From ea9a54421a94bf3d029df139d1ed5fe352e2755b Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 07:51:07 +0100
Subject: [PATCH 01/33] ui_fraph.old Version

---
 app/frontend/ui_graph_service.py | 311 ++++++++++++-------------------
 1 file changed, 124 insertions(+), 187 deletions(-)

diff --git a/app/frontend/ui_graph_service.py b/app/frontend/ui_graph_service.py
index fc2d3e2..bcaa0a3 100644
--- a/app/frontend/ui_graph_service.py
+++ b/app/frontend/ui_graph_service.py
@@ -1,30 +1,25 @@
 """
 FILE: app/frontend/ui_graph_service.py
 DESCRIPTION: Data Layer für den Graphen. Greift direkt auf Qdrant zu (Performance), um Knoten/Kanten zu laden und Texte zu rekonstruieren ("Stitching").
-VERSION: 2.6.1 (Fix: Anchor-Link & Fragment Resolution)
+VERSION: 2.6.0
 STATUS: Active
 DEPENDENCIES: qdrant_client, streamlit_agraph, ui_config, re
-LAST_ANALYSIS: 2025-12-28
+LAST_ANALYSIS: 2025-12-15
 """
 
 import re
 from qdrant_client import QdrantClient, models
 from streamlit_agraph import Node, Edge
-from ui_config import COLLECTION_PREFIX, GRAPH_COLORS, get_edge_color, SYSTEM_EDGES
+from ui_config import GRAPH_COLORS, get_edge_color, SYSTEM_EDGES
 
 class GraphExplorerService:
-    def __init__(self, url, api_key=None, prefix=None):
-        """
-        Initialisiert den Service. Nutzt COLLECTION_PREFIX aus der Config, 
-        sofern kein spezifischer Prefix übergeben wurde.
-        """
+    def __init__(self, url, api_key=None, prefix="mindnet"):
         self.client = QdrantClient(url=url, api_key=api_key)
-        self.prefix = prefix if prefix else COLLECTION_PREFIX
-        self.notes_col = f"{self.prefix}_notes"
-        self.chunks_col = f"{self.prefix}_chunks"
-        self.edges_col = f"{self.prefix}_edges"
+        self.prefix = prefix
+        self.notes_col = f"{prefix}_notes"
+        self.chunks_col = f"{prefix}_chunks"
+        self.edges_col = f"{prefix}_edges"
         self._note_cache = {} 
-        self._ref_resolution_cache = {} 
 
     def get_note_with_full_content(self, note_id):
         """
@@ -38,7 +33,8 @@ class GraphExplorerService:
         # 2. Volltext aus Chunks bauen
         full_text = self._fetch_full_text_stitched(note_id)
         
-        # 3. Ergebnis kombinieren (Kopie zurückgeben)
+        # 3. Ergebnis kombinieren (Wir überschreiben das 'fulltext' Feld mit dem frischen Stitching)
+        # Wir geben eine Kopie zurück, um den Cache nicht zu verfälschen
         complete_note = meta.copy()
         if full_text:
             complete_note['fulltext'] = full_text
@@ -61,7 +57,7 @@ class GraphExplorerService:
         # Initialset für Suche
         level_1_ids = {center_note_id}
         
-        # Suche Kanten für Center (L1) inkl. Titel für Anchor-Suche
+        # Suche Kanten für Center (L1)
         l1_edges = self._find_connected_edges([center_note_id], center_note.get("title"))
         
         for edge_data in l1_edges:
@@ -84,6 +80,7 @@ class GraphExplorerService:
         if center_note_id in nodes_dict:
             orig_title = nodes_dict[center_note_id].title
             clean_full = self._clean_markdown(center_text[:2000])
+            # Wir packen den Text in den Tooltip (title attribute)
             nodes_dict[center_note_id].title = f"{orig_title}\n\n📄 INHALT:\n{clean_full}..."
 
         # B. Previews für alle Nachbarn holen (Batch)
@@ -103,6 +100,8 @@ class GraphExplorerService:
             prov = data['provenance']
             color = get_edge_color(kind)
             is_smart = (prov != "explicit" and prov != "rule")
+            
+            # Label Logik
             label_text = kind if show_labels else " "
             
             final_edges.append(Edge(
@@ -113,11 +112,15 @@ class GraphExplorerService:
         return list(nodes_dict.values()), final_edges
 
     def _clean_markdown(self, text):
-        """Entfernt Markdown-Sonderzeichen für saubere Tooltips."""
+        """Entfernt Markdown-Sonderzeichen für saubere Tooltips im Browser."""
         if not text: return ""
+        # Entferne Header Marker (## )
         text = re.sub(r'#+\s', '', text)
+        # Entferne Bold/Italic (** oder *)
         text = re.sub(r'\*\*|__|\*|_', '', text)
+        # Entferne Links [Text](Url) -> Text
         text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
+        # Entferne Wikilinks [[Link]] -> Link
         text = re.sub(r'\[\[([^\]]+)\]\]', r'\1', text)
         return text
 
@@ -127,252 +130,186 @@ class GraphExplorerService:
             scroll_filter = models.Filter(
                 must=[models.FieldCondition(key="note_id", match=models.MatchValue(value=note_id))]
             )
+            # Limit hoch genug setzen
             chunks, _ = self.client.scroll(self.chunks_col, scroll_filter=scroll_filter, limit=100, with_payload=True)
+            # Sortieren nach 'ord' (Reihenfolge im Dokument)
             chunks.sort(key=lambda x: x.payload.get('ord', 999))
-            full_text = [c.payload.get('text', '') for c in chunks if c.payload.get('text')]
+            
+            full_text = []
+            for c in chunks:
+                # 'text' ist der reine Inhalt ohne Overlap
+                txt = c.payload.get('text', '')
+                if txt: full_text.append(txt)
+            
             return "\n\n".join(full_text)
         except:
             return "Fehler beim Laden des Volltexts."
+
     def _fetch_previews_for_nodes(self, node_ids):
-        """
-        Holt Batch-weise den ersten relevanten Textabschnitt für eine Liste von Nodes.
-        Optimiert die Ladezeit durch Reduzierung der API-Calls.
-        """
-        if not node_ids: 
-            return {}
+        """Holt Batch-weise den ersten Chunk für eine Liste von Nodes."""
+        if not node_ids: return {}
         previews = {}
         try:
-            scroll_filter = models.Filter(
-                must=[models.FieldCondition(key="note_id", match=models.MatchAny(any=node_ids))]
-            )
-            # Genügend Chunks laden, um für jede ID eine Vorschau zu finden
+            scroll_filter = models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchAny(any=node_ids))])
+            # Limit = Anzahl Nodes * 3 (Puffer)
             chunks, _ = self.client.scroll(self.chunks_col, scroll_filter=scroll_filter, limit=len(node_ids)*3, with_payload=True)
             
             for c in chunks:
                 nid = c.payload.get("note_id")
-                # Wir nehmen den ersten gefundenen Chunk
+                # Nur den ersten gefundenen Chunk pro Note nehmen
                 if nid and nid not in previews:
                     previews[nid] = c.payload.get("window") or c.payload.get("text") or ""
-        except Exception: 
-            pass
+        except: pass
         return previews
 
     def _find_connected_edges(self, note_ids, note_title=None):
-        """
-        Findet ein- und ausgehende Kanten für eine Liste von IDs.
-        Implementiert den Fix für Anker-Links [[Titel#Abschnitt]] durch Präfix-Suche in der target_id.
-        """
+        """Findet eingehende und ausgehende Kanten."""
+        
         results = []
-        if not note_ids:
-            return results
         
-        # 1. AUSGEHENDE KANTEN (Outgoing)
-        # Suche über 'note_id' als Besitzer der Kante.
-        out_filter = models.Filter(must=[
-            models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids)),
-            models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))
-        ])
-        res_out, _ = self.client.scroll(self.edges_col, scroll_filter=out_filter, limit=2000, with_payload=True)
-        results.extend(res_out)
+        # 1. OUTGOING EDGES (Der "Owner"-Fix)
+        # Wir suchen Kanten, die im Feld 'note_id' (Owner) eine unserer Notizen haben.
+        # Das findet ALLE ausgehenden Kanten, egal ob sie an einem Chunk oder der Note hängen.
+        if note_ids:
+            out_filter = models.Filter(must=[
+                models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids)),
+                models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))
+            ])
+            # Limit hoch, um alles zu finden
+            res_out, _ = self.client.scroll(self.edges_col, scroll_filter=out_filter, limit=500, with_payload=True)
+            results.extend(res_out)
 
-        # 2. EINGEHENDE KANTEN (Incoming)
-        # Suche über target_id (Ziel der Kante).
+        # 2. INCOMING EDGES (Ziel = Chunk ID oder Titel oder Note ID)
+        # Hier müssen wir Chunks auflösen, um Treffer auf Chunks zu finden.
         
-        # Sammele alle Chunk-IDs für exakte Treffer auf Segment-Ebene
-        c_filter = models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids))])
-        chunks, _ = self.client.scroll(self.chunks_col, scroll_filter=c_filter, limit=1000, with_payload=False)
-        chunk_ids = [c.id for c in chunks]
+        # Chunk IDs der aktuellen Notes holen
+        chunk_ids = []
+        if note_ids:
+            c_filter = models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids))])
+            chunks, _ = self.client.scroll(self.chunks_col, scroll_filter=c_filter, limit=300)
+            chunk_ids = [c.id for c in chunks]
 
-        should_conditions = []
+        shoulds = []
+        # Case A: Edge zeigt auf einen unserer Chunks
         if chunk_ids: 
-            should_conditions.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=chunk_ids)))
-        should_conditions.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=note_ids)))
-            
-        # TITEL-BASIERTE SUCHE (Inkl. Anker-Fix)
-        titles_to_check = []
-        if note_title:
-            titles_to_check.append(note_title)
-            # Aliase laden für robuste Verlinkung
-            for nid in note_ids:
-                note = self._fetch_note_cached(nid)
-                if note:
-                    aliases = note.get("aliases", [])
-                    if isinstance(aliases, str): aliases = [aliases]
-                    titles_to_check.extend([a for a in aliases if a not in titles_to_check])
-
-        # Exakte Titel-Matches hinzufügen
-        for t in titles_to_check:
-            should_conditions.append(models.FieldCondition(key="target_id", match=models.MatchValue(value=t)))
+            shoulds.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=chunk_ids)))
         
-        if should_conditions:
+        # Case B: Edge zeigt direkt auf unsere Note ID
+        if note_ids:
+            shoulds.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=note_ids)))
+            
+        # Case C: Edge zeigt auf unseren Titel (Wikilinks)
+        if note_title: 
+            shoulds.append(models.FieldCondition(key="target_id", match=models.MatchValue(value=note_title)))
+        
+        if shoulds:
             in_filter = models.Filter(
                 must=[models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))],
-                should=should_conditions
+                should=shoulds
             )
-            res_in, _ = self.client.scroll(self.edges_col, scroll_filter=in_filter, limit=2000, with_payload=True)
+            res_in, _ = self.client.scroll(self.edges_col, scroll_filter=in_filter, limit=500, with_payload=True)
             results.extend(res_in)
-
-        # FIX FÜR [[Titel#Abschnitt]]: Suche nach Fragmenten
-        if titles_to_check:
-            for t in titles_to_check:
-                anchor_filter = models.Filter(must=[
-                    models.FieldCondition(key="target_id", match=models.MatchText(text=t)),
-                    models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))
-                ])
-                res_anchor, _ = self.client.scroll(self.edges_col, scroll_filter=anchor_filter, limit=1000, with_payload=True)
-                
-                existing_ids = {r.id for r in results}
-                for edge in res_anchor:
-                    tgt = edge.payload.get("target_id", "")
-                    # Client-seitige Filterung: Nur Kanten nehmen, die mit Titel# beginnen
-                    if edge.id not in existing_ids and (tgt == t or tgt.startswith(f"{t}#")):
-                        results.append(edge)
-        
+            
         return results
 
     def _find_connected_edges_batch(self, note_ids):
-        """Wrapper für die Suche in tieferen Ebenen des Graphen."""
-        first_note = self._fetch_note_cached(note_ids[0]) if note_ids else None
-        title = first_note.get("title") if first_note else None
-        return self._find_connected_edges(note_ids, note_title=title)
+        # Wrapper für Level 2 Suche
+        return self._find_connected_edges(note_ids)
 
     def _process_edge(self, record, nodes_dict, unique_edges, current_depth):
-        """
-        Verarbeitet eine rohe Kante, löst Quell- und Ziel-Referenzen auf 
-        und fügt sie den Dictionaries für den Graphen hinzu.
-        """
-        if not record or not record.payload:
-            return None, None
-            
+        """Verarbeitet eine rohe Edge, löst IDs auf und fügt sie den Dictionaries hinzu."""
         payload = record.payload
         src_ref = payload.get("source_id")
         tgt_ref = payload.get("target_id")
         kind = payload.get("kind")
         provenance = payload.get("provenance", "explicit")
 
-        if not src_ref or not tgt_ref:
-            return None, None
-
-        # IDs zu Notes auflösen (Hier greift der Fragment-Fix)
+        # IDs zu Notes auflösen
         src_note = self._resolve_note_from_ref(src_ref)
         tgt_note = self._resolve_note_from_ref(tgt_ref)
 
         if src_note and tgt_note:
-            src_id = src_note.get('note_id')
-            tgt_id = tgt_note.get('note_id')
+            src_id = src_note['note_id']
+            tgt_id = tgt_note['note_id']
             
-            if src_id and tgt_id and src_id != tgt_id:
-                # Knoten zum Set hinzufügen
+            if src_id != tgt_id:
+                # Nodes hinzufügen
                 self._add_node_to_dict(nodes_dict, src_note, level=current_depth)
                 self._add_node_to_dict(nodes_dict, tgt_note, level=current_depth)
                 
-                # Kante registrieren (Deduplizierung)
+                # Kante hinzufügen (mit Deduplizierung)
                 key = (src_id, tgt_id)
                 existing = unique_edges.get(key)
                 
-                is_current_explicit = (provenance in ["explicit", "rule"])
                 should_update = True
-                
+                # Bevorzuge explizite Kanten vor Smart Kanten
+                is_current_explicit = (provenance in ["explicit", "rule"])
                 if existing:
-                    is_existing_explicit = (existing.get('provenance', '') in ["explicit", "rule"])
+                    is_existing_explicit = (existing['provenance'] in ["explicit", "rule"])
                     if is_existing_explicit and not is_current_explicit:
                         should_update = False
                 
                 if should_update:
-                    unique_edges[key] = {
-                        "source": src_id, 
-                        "target": tgt_id, 
-                        "kind": kind, 
-                        "provenance": provenance
-                    }
+                    unique_edges[key] = {"source": src_id, "target": tgt_id, "kind": kind, "provenance": provenance}
                 return src_id, tgt_id
         return None, None
 
     def _fetch_note_cached(self, note_id):
-        """Lädt eine Note aus Qdrant mit Session-Caching."""
-        if not note_id:
-            return None
-        if note_id in self._note_cache:
-            return self._note_cache[note_id]
-        
-        try:
-            res, _ = self.client.scroll(
-                collection_name=self.notes_col,
-                scroll_filter=models.Filter(must=[
-                    models.FieldCondition(key="note_id", match=models.MatchValue(value=note_id))
-                ]),
-                limit=1, with_payload=True
-            )
-            if res and res[0].payload:
-                payload = res[0].payload
-                self._note_cache[note_id] = payload
-                return payload
-        except Exception: 
-            pass
+        if note_id in self._note_cache: return self._note_cache[note_id]
+        res, _ = self.client.scroll(
+            collection_name=self.notes_col,
+            scroll_filter=models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchValue(value=note_id))]),
+            limit=1, with_payload=True
+        )
+        if res:
+            self._note_cache[note_id] = res[0].payload
+            return res[0].payload
         return None
 
     def _resolve_note_from_ref(self, ref_str):
-        """
-        Löst eine Referenz (ID, Chunk-ID oder Wikilink mit Anker) auf eine Note auf.
-        Bereinigt Anker (#) vor der Suche.
-        """
-        if not ref_str: 
-            return None
+        """Löst eine ID (Chunk, Note oder Titel) zu einer Note Payload auf."""
+        if not ref_str: return None
         
-        if ref_str in self._ref_resolution_cache:
-            return self._ref_resolution_cache[ref_str]
-        
-        # Fragment-Behandlung: Trenne Anker ab
-        base_ref = ref_str.split("#")[0].strip()
-        
-        # 1. Versuch: Direkte Note-ID Suche
-        note = self._fetch_note_cached(base_ref)
-        if note:
-            self._ref_resolution_cache[ref_str] = note
-            return note
-            
-        # 2. Versuch: Titel-Suche (Keyword-Match)
-        try:
-            res, _ = self.client.scroll(
-                collection_name=self.notes_col,
-                scroll_filter=models.Filter(must=[
-                    models.FieldCondition(key="title", match=models.MatchValue(value=base_ref))
-                ]),
-                limit=1, with_payload=True
-            )
-            if res and res[0].payload:
-                payload = res[0].payload
-                self._ref_resolution_cache[ref_str] = payload
-                return payload
-        except Exception: 
-            pass
-
-        # 3. Versuch: Auflösung über Chunks
+        # Fall A: Chunk ID (enthält #)
         if "#" in ref_str:
             try:
-                res_chunk = self.client.retrieve(self.chunks_col, ids=[ref_str], with_payload=True)
-                if res_chunk and res_chunk[0].payload:
-                    note_id = res_chunk[0].payload.get("note_id")
-                    note = self._fetch_note_cached(note_id)
-                    if note:
-                        self._ref_resolution_cache[ref_str] = note
-                        return note
-            except Exception: 
-                pass
+                # Versuch 1: Chunk ID direkt
+                res = self.client.retrieve(self.chunks_col, ids=[ref_str], with_payload=True)
+                if res: return self._fetch_note_cached(res[0].payload.get("note_id"))
+            except: pass
+            
+            # Versuch 2: NoteID#Section (Hash abtrennen)
+            possible_note_id = ref_str.split("#")[0]
+            if self._fetch_note_cached(possible_note_id): return self._fetch_note_cached(possible_note_id)
+
+        # Fall B: Note ID direkt
+        if self._fetch_note_cached(ref_str): return self._fetch_note_cached(ref_str)
         
+        # Fall C: Titel
+        res, _ = self.client.scroll(
+            collection_name=self.notes_col,
+            scroll_filter=models.Filter(must=[models.FieldCondition(key="title", match=models.MatchValue(value=ref_str))]),
+            limit=1, with_payload=True
+        )
+        if res:
+            self._note_cache[res[0].payload['note_id']] = res[0].payload
+            return res[0].payload
         return None
 
     def _add_node_to_dict(self, node_dict, note_payload, level=1):
-        """Erstellt ein Node-Objekt für streamlit-agraph mit Styling."""
         nid = note_payload.get("note_id")
-        if not nid or nid in node_dict: 
-            return
+        if nid in node_dict: return
         
         ntype = note_payload.get("type", "default")
-        color = GRAPH_COLORS.get(ntype, GRAPH_COLORS.get("default", "#8395a7"))
+        color = GRAPH_COLORS.get(ntype, GRAPH_COLORS["default"])
+        
+        # Basis-Tooltip (wird später erweitert)
         tooltip = f"Titel: {note_payload.get('title')}\nTyp: {ntype}"
         
-        size = 45 if level == 0 else (25 if level == 1 else 15)
+        if level == 0: size = 45 
+        elif level == 1: size = 25
+        else: size = 15
+
         node_dict[nid] = Node(
             id=nid,
             label=note_payload.get('title', nid),

From feeb7c2d9276693c4d69e06f84e4c1552d118439 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 07:58:20 +0100
Subject: [PATCH 02/33] Initial WP4d

---
 app/core/graph/graph_derive_edges.py | 69 ++++++++++++++++++++--------
 app/core/graph/graph_utils.py        | 24 +++++++++-
 app/models/dto.py                    |  5 +-
 3 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py
index 284e789..d12c5e8 100644
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@@ -1,10 +1,11 @@
 """
 FILE: app/core/graph/graph_derive_edges.py
 DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
+             AUDIT: Integriert parse_link_target für saubere Graphen-Topologie.
 """
 from typing import List, Optional, Dict, Tuple
 from .graph_utils import (
-    _get, _edge, _mk_edge_id, _dedupe_seq, 
+    _get, _edge, _mk_edge_id, _dedupe_seq, parse_link_target,
     PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for
 )
 from .graph_extractors import (
@@ -53,47 +54,79 @@ def build_edges_for_note(
 
         # Typed & Candidate Pool (WP-15b Integration)
         typed, rem = extract_typed_relations(raw)
-        for k, t in typed:
-            edges.append(_edge(k, "chunk", cid, t, note_id, {
+        for k, raw_t in typed:
+            t, sec = parse_link_target(raw_t, note_id)
+            if not t: continue
+            
+            payload = {
                 "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"),
                 "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
-            }))
+            }
+            if sec: payload["target_section"] = sec
+            
+            edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
         pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
         for cand in pool:
-            t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
+            raw_t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
+            t, sec = parse_link_target(raw_t, note_id)
             if t:
-                edges.append(_edge(k, "chunk", cid, t, note_id, {
+                payload = {
                     "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"),
                     "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
-                }))
+                }
+                if sec: payload["target_section"] = sec
+                
+                edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
         # Callouts & Wikilinks
         call_pairs, rem2 = extract_callout_relations(rem)
-        for k, t in call_pairs:
-            edges.append(_edge(k, "chunk", cid, t, note_id, {
+        for k, raw_t in call_pairs:
+            t, sec = parse_link_target(raw_t, note_id)
+            if not t: continue
+            
+            payload = {
                 "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"),
                 "provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
-            }))
+            }
+            if sec: payload["target_section"] = sec
+            
+            edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
         refs = extract_wikilinks(rem2)
-        for r in refs:
-            edges.append(_edge("references", "chunk", cid, r, note_id, {
-                "chunk_id": cid, "ref_text": r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
+        for raw_r in refs:
+            r, sec = parse_link_target(raw_r, note_id)
+            if not r: continue
+            
+            payload = {
+                "chunk_id": cid, "ref_text": raw_r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
                 "provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
-            }))
+            }
+            if sec: payload["target_section"] = sec
+            
+            edges.append(_edge("references", "chunk", cid, r, note_id, payload))
+            
             for rel in defaults:
                 if rel != "references":
-                    edges.append(_edge(rel, "chunk", cid, r, note_id, {
+                    def_payload = {
                         "chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"),
                         "provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"]
-                    }))
-        refs_all.extend(refs)
+                    }
+                    if sec: def_payload["target_section"] = sec
+                    edges.append(_edge(rel, "chunk", cid, r, note_id, def_payload))
+        
+        # Für Note-Scope Sammlung nutzen wir den Original-String zur Dedup, aber gesäubert
+        refs_all.extend([parse_link_target(r, note_id)[0] for r in refs])
 
     # 3) Note-Scope & De-Duplizierung
     if include_note_scope_refs:
-        refs_note = _dedupe_seq((refs_all or []) + (note_level_references or []))
+        # refs_all ist jetzt schon gesäubert (nur Targets)
+        # note_level_references müssen auch gesäubert werden
+        cleaned_note_refs = [parse_link_target(r, note_id)[0] for r in (note_level_references or [])]
+        refs_note = _dedupe_seq((refs_all or []) + cleaned_note_refs)
+        
         for r in refs_note:
+            if not r: continue
             edges.append(_edge("references", "note", note_id, r, note_id, {
                 "edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
                 "provenance": "explicit", "confidence": PROVENANCE_PRIORITY["explicit:note_scope"]
diff --git a/app/core/graph/graph_utils.py b/app/core/graph/graph_utils.py
index 5f295ed..d814ad7 100644
--- a/app/core/graph/graph_utils.py
+++ b/app/core/graph/graph_utils.py
@@ -1,10 +1,11 @@
 """
 FILE: app/core/graph/graph_utils.py
 DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
+             AUDIT: Erweitert um parse_link_target für sauberes Section-Splitting (WP-Fix).
 """
 import os
 import hashlib
-from typing import Iterable, List, Optional, Set, Any
+from typing import Iterable, List, Optional, Set, Any, Tuple
 
 try:
     import yaml
@@ -59,6 +60,27 @@ def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, e
     if extra: pl.update(extra)
     return pl
 
+def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]:
+    """
+    Zerlegt einen Link (z.B. 'Note#Section') in Target-ID und Section.
+    Behandelt Self-Links ('#Section'), indem current_note_id eingesetzt wird.
+    
+    Returns:
+        (target_id, target_section)
+    """
+    if not raw:
+        return "", None
+    
+    parts = raw.split("#", 1)
+    target = parts[0].strip()
+    section = parts[1].strip() if len(parts) > 1 else None
+    
+    # Handle Self-Link [[#Section]] -> target wird zu current_note_id
+    if not target and section and current_note_id:
+        target = current_note_id
+        
+    return target, section
+
 def load_types_registry() -> dict:
     """Lädt die YAML-Registry."""
     p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
diff --git a/app/models/dto.py b/app/models/dto.py
index 7d4cb64..4c6dd67 100644
--- a/app/models/dto.py
+++ b/app/models/dto.py
@@ -1,10 +1,10 @@
 """
 FILE: app/models/dto.py
 DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
-VERSION: 0.6.6 (WP-22 Debug & Stability Update)
+VERSION: 0.6.7 (WP-Fix: Target Section Support)
 STATUS: Active
 DEPENDENCIES: pydantic, typing, uuid
-LAST_ANALYSIS: 2025-12-18
+LAST_ANALYSIS: 2025-12-29
 """
 
 from __future__ import annotations
@@ -43,6 +43,7 @@ class EdgeDTO(BaseModel):
     direction: Literal["out", "in", "undirected"] = "out"
     provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
     confidence: float = 1.0
+    target_section: Optional[str] = None  # Neu: Speichert den Anker (z.B. #Abschnitt)
 
 
 # --- Request Models ---

From 303efefcb737cc411ab2815d9f6f509bb6d9c3aa Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 08:19:40 +0100
Subject: [PATCH 03/33] bug fix

---
 app/core/graph/graph_extractors.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/app/core/graph/graph_extractors.py b/app/core/graph/graph_extractors.py
index 9c1fedf..70d5ae5 100644
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@@ -1,11 +1,15 @@
 """
 FILE: app/core/graph/graph_extractors.py
 DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
+             AUDIT: Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen Support).
 """
 import re
 from typing import List, Tuple
 
-_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]")
+# Fix: Erlaube alle Zeichen außer ']' im Target, statt nur a-z0-9.
+# Das fängt Umlaute, Emojis, '&', '#' und Leerzeichen ab.
+_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]")
+
 _REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
 _REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\]\]",   re.IGNORECASE)
 _REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
@@ -14,12 +18,16 @@ _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
 _REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
 _WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
 
-def extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """Extrahiert [[rel:KIND|Target]]."""
+def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
+    """
+    Findet Inline-Relationen wie [[rel:depends_on Target]].
+    Gibt (Liste[(kind, target)], bereinigter_text) zurück.
+    """
+    if not text: return [], ""
     pairs = []
     def _collect(m):
-        k, t = (m.group("kind") or "").strip().lower(), (m.group("target") or "").strip()
-        if k and t: pairs.append((k, t))
+        k, t = m.group("kind").strip().lower(), m.group("target").strip()
+        pairs.append((k, t))
         return ""
     text = _REL_PIPE.sub(_collect, text)
     text = _REL_SPACE.sub(_collect, text)
@@ -45,11 +53,10 @@ def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
             found = _WIKILINKS_IN_LINE.findall(targets)
             if found:
                 for t in found: out_pairs.append((kind, t.strip()))
-            else:
-                for raw in re.split(r"[,;]", targets):
-                    if raw.strip(): out_pairs.append((kind, raw.strip()))
     return out_pairs, "\n".join(keep_lines)
 
 def extract_wikilinks(text: str) -> List[str]:
-    """Extrahiert Standard-Wikilinks."""
-    return [m.group(1).strip() for m in _WIKILINK_RE.finditer(text or "")]
\ No newline at end of file
+    """Findet Standard-Wikilinks [[Target]] oder [[Alias|Target]]."""
+    if not text: return []
+    # match.group(1) ist jetzt das Target (dank des fixierten Regex)
+    return [m.strip() for m in _WIKILINK_RE.findall(text) if m.strip()]
\ No newline at end of file

From 62b5a8bf6513920661b2558708f17cb9ffac02fc Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 08:40:05 +0100
Subject: [PATCH 04/33] =?UTF-8?q?Anpassung=20payload=20f=C3=BCr=20neues=20?=
 =?UTF-8?q?Feld=20in=20edges?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/database/qdrant_points.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/app/core/database/qdrant_points.py b/app/core/database/qdrant_points.py
index fd90403..7c36a52 100644
--- a/app/core/database/qdrant_points.py
+++ b/app/core/database/qdrant_points.py
@@ -1,10 +1,10 @@
 """
 FILE: app/core/database/qdrant_points.py
 DESCRIPTION: Object-Mapper für Qdrant. Konvertiert JSON-Payloads (Notes, Chunks, Edges) in PointStructs und generiert deterministische UUIDs.
-VERSION: 1.5.0
+VERSION: 1.5.1 (WP-Fix: Explicit Target Section Support)
 STATUS: Active
 DEPENDENCIES: qdrant_client, uuid, os
-LAST_ANALYSIS: 2025-12-15
+LAST_ANALYSIS: 2025-12-29
 """
 from __future__ import annotations
 import os
@@ -46,16 +46,25 @@ def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[Lis
     return chunks_col, points
 
 def _normalize_edge_payload(pl: dict) -> dict:
+    """Normalisiert Edge-Felder und sichert Schema-Konformität."""
     kind = pl.get("kind") or pl.get("edge_type") or "edge"
     source_id = pl.get("source_id") or pl.get("src_id") or "unknown-src"
     target_id = pl.get("target_id") or pl.get("dst_id") or "unknown-tgt"
     seq = pl.get("seq") or pl.get("order") or pl.get("index")
+    
+    # WP-Fix: target_section explizit durchreichen
+    target_section = pl.get("target_section")
 
     pl.setdefault("kind", kind)
     pl.setdefault("source_id", source_id)
     pl.setdefault("target_id", target_id)
+    
     if seq is not None and "seq" not in pl:
         pl["seq"] = seq
+        
+    if target_section is not None:
+        pl["target_section"] = target_section
+        
     return pl
 
 def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]:

From ac9956bf000348eae48482938a3b2302241b7431 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 10:16:51 +0100
Subject: [PATCH 05/33] Index und Anlage neues Feld in qdrant

---
 app/core/database/qdrant.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/core/database/qdrant.py b/app/core/database/qdrant.py
index 163c210..d5ee7bd 100644
--- a/app/core/database/qdrant.py
+++ b/app/core/database/qdrant.py
@@ -3,7 +3,7 @@ FILE: app/core/database/qdrant.py
 DESCRIPTION: Qdrant-Client Factory und Schema-Management. 
              Erstellt Collections und Payload-Indizes.
              MODULARISIERUNG: Verschoben in das database-Paket für WP-14.
-VERSION: 2.2.1
+VERSION: 2.2.2 (WP-Fix: Index für target_section)
 STATUS: Active
 DEPENDENCIES: qdrant_client, dataclasses, os
 """
@@ -124,7 +124,7 @@ def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
     Stellt sicher, dass alle benötigten Payload-Indizes für die Suche existieren.
     - notes:  note_id, type, title, updated, tags
     - chunks: note_id, chunk_id, index, type, tags
-    - edges:  note_id, kind, scope, source_id, target_id, chunk_id
+    - edges:  note_id, kind, scope, source_id, target_id, chunk_id, target_section
     """
     notes, chunks, edges = collection_names(prefix)
 
@@ -156,6 +156,8 @@ def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
         ("source_id", rest.PayloadSchemaType.KEYWORD),
         ("target_id", rest.PayloadSchemaType.KEYWORD),
         ("chunk_id",  rest.PayloadSchemaType.KEYWORD),
+        # NEU: Index für Section-Links (WP-15b)
+        ("target_section", rest.PayloadSchemaType.KEYWORD),
     ]:
         _ensure_index(client, edges, field, schema)
 

From e180018c993b45f666c9c4c7bbc2ed0dd7dc67f5 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 10:31:51 +0100
Subject: [PATCH 06/33] Anpassung gui

---
 app/frontend/ui_graph_service.py | 211 ++++++++++++++++++++++++-------
 1 file changed, 167 insertions(+), 44 deletions(-)

diff --git a/app/frontend/ui_graph_service.py b/app/frontend/ui_graph_service.py
index bcaa0a3..c0164c8 100644
--- a/app/frontend/ui_graph_service.py
+++ b/app/frontend/ui_graph_service.py
@@ -163,31 +163,33 @@ class GraphExplorerService:
         return previews
 
     def _find_connected_edges(self, note_ids, note_title=None):
-        """Findet eingehende und ausgehende Kanten."""
+        """
+        Findet eingehende und ausgehende Kanten.
         
+        WICHTIG: target_id enthält nur den Titel (ohne #Abschnitt).
+        target_section ist ein separates Feld für Abschnitt-Informationen.
+        """
         results = []
+        if not note_ids:
+            return results
         
         # 1. OUTGOING EDGES (Der "Owner"-Fix)
         # Wir suchen Kanten, die im Feld 'note_id' (Owner) eine unserer Notizen haben.
         # Das findet ALLE ausgehenden Kanten, egal ob sie an einem Chunk oder der Note hängen.
-        if note_ids:
-            out_filter = models.Filter(must=[
-                models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids)),
-                models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))
-            ])
-            # Limit hoch, um alles zu finden
-            res_out, _ = self.client.scroll(self.edges_col, scroll_filter=out_filter, limit=500, with_payload=True)
-            results.extend(res_out)
+        out_filter = models.Filter(must=[
+            models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids)),
+            models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))
+        ])
+        res_out, _ = self.client.scroll(self.edges_col, scroll_filter=out_filter, limit=2000, with_payload=True)
+        results.extend(res_out)
 
-        # 2. INCOMING EDGES (Ziel = Chunk ID oder Titel oder Note ID)
-        # Hier müssen wir Chunks auflösen, um Treffer auf Chunks zu finden.
+        # 2. INCOMING EDGES (Ziel = Chunk ID, Note ID oder Titel)
+        # WICHTIG: target_id enthält nur den Titel, target_section ist separat
         
         # Chunk IDs der aktuellen Notes holen
-        chunk_ids = []
-        if note_ids:
-            c_filter = models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids))])
-            chunks, _ = self.client.scroll(self.chunks_col, scroll_filter=c_filter, limit=300)
-            chunk_ids = [c.id for c in chunks]
+        c_filter = models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchAny(any=note_ids))])
+        chunks, _ = self.client.scroll(self.chunks_col, scroll_filter=c_filter, limit=1000, with_payload=False)
+        chunk_ids = [c.id for c in chunks]
 
         shoulds = []
         # Case A: Edge zeigt auf einen unserer Chunks
@@ -195,42 +197,92 @@ class GraphExplorerService:
             shoulds.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=chunk_ids)))
         
         # Case B: Edge zeigt direkt auf unsere Note ID
-        if note_ids:
-            shoulds.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=note_ids)))
-            
-        # Case C: Edge zeigt auf unseren Titel (Wikilinks)
-        if note_title: 
-            shoulds.append(models.FieldCondition(key="target_id", match=models.MatchValue(value=note_title)))
+        shoulds.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=note_ids)))
+        
+        # Case C: Edge zeigt auf unseren Titel
+        # WICHTIG: target_id enthält nur den Titel (z.B. "Meine Prinzipien 2025")
+        # target_section enthält die Abschnitt-Information (z.B. "P3 – Disziplin"), wenn gesetzt
+        
+        # Sammle alle relevanten Titel (inkl. Aliase)
+        titles_to_search = []
+        if note_title:
+            titles_to_search.append(note_title)
+        
+        # Lade auch Titel aus den Notes selbst (falls note_title nicht übergeben wurde)
+        for nid in note_ids:
+            note = self._fetch_note_cached(nid)
+            if note:
+                note_title_from_db = note.get("title")
+                if note_title_from_db and note_title_from_db not in titles_to_search:
+                    titles_to_search.append(note_title_from_db)
+                # Aliase hinzufügen
+                aliases = note.get("aliases", [])
+                if isinstance(aliases, str):
+                    aliases = [aliases]
+                for alias in aliases:
+                    if alias and alias not in titles_to_search:
+                        titles_to_search.append(alias)
+        
+        # Für jeden Titel: Suche nach exaktem Match
+        # target_id enthält nur den Titel, daher reicht MatchValue
+        for title in titles_to_search:
+            shoulds.append(models.FieldCondition(key="target_id", match=models.MatchValue(value=title)))
         
         if shoulds:
             in_filter = models.Filter(
                 must=[models.FieldCondition(key="kind", match=models.MatchExcept(**{"except": SYSTEM_EDGES}))],
                 should=shoulds
             )
-            res_in, _ = self.client.scroll(self.edges_col, scroll_filter=in_filter, limit=500, with_payload=True)
+            res_in, _ = self.client.scroll(self.edges_col, scroll_filter=in_filter, limit=2000, with_payload=True)
             results.extend(res_in)
             
         return results
 
     def _find_connected_edges_batch(self, note_ids):
-        # Wrapper für Level 2 Suche
-        return self._find_connected_edges(note_ids)
+        """
+        Wrapper für Level 2 Suche.
+        Lädt Titel der ersten Note für Titel-basierte Suche.
+        """
+        if not note_ids:
+            return []
+        first_note = self._fetch_note_cached(note_ids[0])
+        note_title = first_note.get("title") if first_note else None
+        return self._find_connected_edges(note_ids, note_title=note_title)
 
     def _process_edge(self, record, nodes_dict, unique_edges, current_depth):
-        """Verarbeitet eine rohe Edge, löst IDs auf und fügt sie den Dictionaries hinzu."""
+        """
+        Verarbeitet eine rohe Edge, löst IDs auf und fügt sie den Dictionaries hinzu.
+        
+        WICHTIG: Beide Richtungen werden unterstützt:
+        - Ausgehende Kanten: source_id gehört zu unserer Note (via note_id Owner)
+        - Eingehende Kanten: target_id zeigt auf unsere Note (via target_id Match)
+        """
+        if not record or not record.payload:
+            return None, None
+            
         payload = record.payload
         src_ref = payload.get("source_id")
         tgt_ref = payload.get("target_id")
         kind = payload.get("kind")
         provenance = payload.get("provenance", "explicit")
 
+        # Prüfe, ob beide Referenzen vorhanden sind
+        if not src_ref or not tgt_ref:
+            return None, None
+
         # IDs zu Notes auflösen
+        # WICHTIG: source_id kann Chunk-ID (note_id#c01), Note-ID oder Titel sein
+        # WICHTIG: target_id kann Chunk-ID, Note-ID oder Titel sein (ohne #Abschnitt)
         src_note = self._resolve_note_from_ref(src_ref)
         tgt_note = self._resolve_note_from_ref(tgt_ref)
 
         if src_note and tgt_note:
-            src_id = src_note['note_id']
-            tgt_id = tgt_note['note_id']
+            src_id = src_note.get('note_id')
+            tgt_id = tgt_note.get('note_id')
+            
+            # Prüfe, ob beide IDs vorhanden sind
+            if not src_id or not tgt_id:
+                return None, None
             
             if src_id != tgt_id:
                 # Nodes hinzufügen
@@ -245,7 +297,7 @@ class GraphExplorerService:
                 # Bevorzuge explizite Kanten vor Smart Kanten
                 is_current_explicit = (provenance in ["explicit", "rule"])
                 if existing:
-                    is_existing_explicit = (existing['provenance'] in ["explicit", "rule"])
+                    is_existing_explicit = (existing.get('provenance', '') in ["explicit", "rule"])
                     if is_existing_explicit and not is_current_explicit:
                         should_update = False
                 
@@ -267,33 +319,104 @@ class GraphExplorerService:
         return None
 
     def _resolve_note_from_ref(self, ref_str):
-        """Löst eine ID (Chunk, Note oder Titel) zu einer Note Payload auf."""
-        if not ref_str: return None
+        """
+        Löst eine Referenz zu einer Note Payload auf.
         
-        # Fall A: Chunk ID (enthält #)
+        WICHTIG: Wenn ref_str ein Titel#Abschnitt Format hat, wird nur der Titel-Teil verwendet.
+        Unterstützt:
+        - Note-ID: "20250101-meine-note"
+        - Chunk-ID: "20250101-meine-note#c01"
+        - Titel: "Meine Prinzipien 2025"
+        - Titel#Abschnitt: "Meine Prinzipien 2025#P3 – Disziplin" (trennt Abschnitt ab, sucht nur nach Titel)
+        """
+        if not ref_str:
+            return None
+        
+        # Fall A: Enthält # (kann Chunk-ID oder Titel#Abschnitt sein)
         if "#" in ref_str:
             try:
-                # Versuch 1: Chunk ID direkt
+                # Versuch 1: Chunk ID direkt (Format: note_id#c01)
                 res = self.client.retrieve(self.chunks_col, ids=[ref_str], with_payload=True)
-                if res: return self._fetch_note_cached(res[0].payload.get("note_id"))
-            except: pass
+                if res and res[0].payload:
+                    note_id = res[0].payload.get("note_id")
+                    if note_id:
+                        return self._fetch_note_cached(note_id)
+            except:
+                pass
             
-            # Versuch 2: NoteID#Section (Hash abtrennen)
-            possible_note_id = ref_str.split("#")[0]
-            if self._fetch_note_cached(possible_note_id): return self._fetch_note_cached(possible_note_id)
+            # Versuch 2: NoteID#Section (Hash abtrennen und als Note-ID versuchen)
+            # z.B. "20250101-meine-note#Abschnitt" -> "20250101-meine-note"
+            possible_note_id = ref_str.split("#")[0].strip()
+            note = self._fetch_note_cached(possible_note_id)
+            if note:
+                return note
+            
+            # Versuch 3: Titel#Abschnitt (Hash abtrennen und als Titel suchen)
+            # z.B. "Meine Prinzipien 2025#P3 – Disziplin" -> "Meine Prinzipien 2025"
+            # WICHTIG: target_id enthält nur den Titel, daher suchen wir nur nach dem Titel-Teil
+            possible_title = ref_str.split("#")[0].strip()
+            if possible_title:
+                res, _ = self.client.scroll(
+                    collection_name=self.notes_col,
+                    scroll_filter=models.Filter(must=[
+                        models.FieldCondition(key="title", match=models.MatchValue(value=possible_title))
+                    ]),
+                    limit=1, with_payload=True
+                )
+                if res and res[0].payload:
+                    payload = res[0].payload
+                    self._note_cache[payload['note_id']] = payload
+                    return payload
+                
+                # Fallback: Text-Suche für Fuzzy-Matching
+                res, _ = self.client.scroll(
+                    collection_name=self.notes_col,
+                    scroll_filter=models.Filter(must=[
+                        models.FieldCondition(key="title", match=models.MatchText(text=possible_title))
+                    ]),
+                    limit=10, with_payload=True
+                )
+                if res:
+                    # Nimm das erste Ergebnis, das exakt oder beginnend mit possible_title übereinstimmt
+                    for r in res:
+                        if r.payload:
+                            note_title = r.payload.get("title", "")
+                            if note_title == possible_title or note_title.startswith(possible_title):
+                                payload = r.payload
+                                self._note_cache[payload['note_id']] = payload
+                                return payload
 
         # Fall B: Note ID direkt
-        if self._fetch_note_cached(ref_str): return self._fetch_note_cached(ref_str)
+        note = self._fetch_note_cached(ref_str)
+        if note:
+            return note
         
-        # Fall C: Titel
+        # Fall C: Titel (exakte Übereinstimmung)
         res, _ = self.client.scroll(
             collection_name=self.notes_col,
-            scroll_filter=models.Filter(must=[models.FieldCondition(key="title", match=models.MatchValue(value=ref_str))]),
+            scroll_filter=models.Filter(must=[
+                models.FieldCondition(key="title", match=models.MatchValue(value=ref_str))
+            ]),
             limit=1, with_payload=True
         )
-        if res:
-            self._note_cache[res[0].payload['note_id']] = res[0].payload
-            return res[0].payload
+        if res and res[0].payload:
+            payload = res[0].payload
+            self._note_cache[payload['note_id']] = payload
+            return payload
+        
+        # Fall D: Titel (Text-Suche für Fuzzy-Matching)
+        res, _ = self.client.scroll(
+            collection_name=self.notes_col,
+            scroll_filter=models.Filter(must=[
+                models.FieldCondition(key="title", match=models.MatchText(text=ref_str))
+            ]),
+            limit=1, with_payload=True
+        )
+        if res and res[0].payload:
+            payload = res[0].payload
+            self._note_cache[payload['note_id']] = payload
+            return payload
+        
         return None
 
     def _add_node_to_dict(self, node_dict, note_payload, level=1):

From 857ba953e336c955d279fd99094aaa578ee2bcb2 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 11:00:00 +0100
Subject: [PATCH 07/33] bug fix

---
 app/core/graph/graph_extractors.py | 95 +++++++++++++++++++++++++-----
 app/frontend/ui_graph_service.py   | 18 +++---
 2 files changed, 91 insertions(+), 22 deletions(-)

diff --git a/app/core/graph/graph_extractors.py b/app/core/graph/graph_extractors.py
index 70d5ae5..690e561 100644
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@@ -1,13 +1,14 @@
 """
 FILE: app/core/graph/graph_extractors.py
 DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
-             AUDIT: Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen Support).
+             AUDIT: 
+             - Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen).
+             - Callout-Parser erweitert für Multi-Line-Listen und Header-Typen.
 """
 import re
 from typing import List, Tuple
 
-# Fix: Erlaube alle Zeichen außer ']' im Target, statt nur a-z0-9.
-# Das fängt Umlaute, Emojis, '&', '#' und Leerzeichen ab.
+# Erlaube alle Zeichen außer ']' im Target (fängt Umlaute, Emojis, '&', '#' ab)
 _WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]")
 
 _REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
@@ -15,8 +16,10 @@ _REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\
 _REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
 
 _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
+# Erkennt "kind: targets..."
 _REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
-_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
+# Erkennt reine Typen (z.B. "depends_on" im Header)
+_SIMPLE_KIND   = re.compile(r"^[a-z_]+$", re.IGNORECASE)
 
 def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
     """
@@ -35,28 +38,90 @@ def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
     return pairs, text
 
 def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """Verarbeitet Obsidian [!edge]-Callouts."""
+    """
+    Verarbeitet Obsidian [!edge]-Callouts.
+    Unterstützt zwei Formate:
+    1. Explizit: "kind: [[Target]]"
+    2. Implizit (Header): "> [!edge] kind" gefolgt von "[[Target]]" Zeilen
+    """
     if not text: return [], text
-    lines = text.splitlines(); out_pairs, keep_lines, i = [], [], 0
+    lines = text.splitlines()
+    out_pairs = []
+    keep_lines = []
+    i = 0
+    
     while i < len(lines):
-        m = _CALLOUT_START.match(lines[i])
+        line = lines[i]
+        m = _CALLOUT_START.match(line)
         if not m:
-            keep_lines.append(lines[i]); i += 1; continue
-        block_lines = [m.group(1)] if m.group(1).strip() else []
+            keep_lines.append(line)
+            i += 1
+            continue
+        
+        # Callout-Block gefunden. Wir sammeln alle relevanten Zeilen.
+        block_lines = []
+        
+        # Header Content prüfen (z.B. "type" aus "> [!edge] type")
+        header_raw = m.group(1).strip()
+        if header_raw:
+            block_lines.append(header_raw)
+            
         i += 1
         while i < len(lines) and lines[i].lstrip().startswith('>'):
-            block_lines.append(lines[i].lstrip()[1:].lstrip()); i += 1
+            # Entferne '>' und führende Leerzeichen
+            content = lines[i].lstrip()[1:].lstrip()
+            if content:
+                block_lines.append(content)
+            i += 1
+            
+        # Verarbeitung des Blocks
+        current_kind = None
+        
+        # Heuristik: Ist die allererste Zeile (meist aus dem Header) ein reiner Typ?
+        # Dann setzen wir diesen als Default für den Block.
+        if block_lines:
+            first = block_lines[0]
+            # Wenn es NICHT wie "Key: Value" aussieht, aber wie ein Wort:
+            if not _REL_LINE.match(first) and _SIMPLE_KIND.match(first):
+                current_kind = first.lower()
+                
         for bl in block_lines:
+            # 1. Prüfen auf explizites "Kind: Targets" (überschreibt Header-Typ für diese Zeile)
             mrel = _REL_LINE.match(bl)
-            if not mrel: continue
-            kind, targets = mrel.group("kind").strip().lower(), mrel.group("targets") or ""
-            found = _WIKILINKS_IN_LINE.findall(targets)
+            if mrel:
+                line_kind = mrel.group("kind").strip().lower()
+                targets = mrel.group("targets")
+                
+                # Links extrahieren
+                found = _WIKILINK_RE.findall(targets)
+                if found:
+                    for t in found: out_pairs.append((line_kind, t.strip()))
+                else:
+                    # Fallback für kommagetrennten Plaintext
+                    for raw in re.split(r"[,;]", targets):
+                        if raw.strip(): out_pairs.append((line_kind, raw.strip()))
+                
+                # Wenn wir eine explizite Zeile gefunden haben, aktualisieren wir NICHT 
+                # den current_kind für nachfolgende Zeilen (Design-Entscheidung: lokal scope),
+                # oder wir machen es doch? 
+                # Üblicher ist: Header setzt Default, Zeile überschreibt lokal. 
+                # Wir lassen current_kind also unangetastet.
+                continue
+            
+            # 2. Kein Key:Value Muster -> Prüfen auf Links, die den current_kind nutzen
+            found = _WIKILINK_RE.findall(bl)
             if found:
-                for t in found: out_pairs.append((kind, t.strip()))
+                if current_kind:
+                    for t in found: out_pairs.append((current_kind, t.strip()))
+                else:
+                    # Link ohne Typ und ohne Header-Typ.
+                    # Wird ignoriert oder könnte als 'related_to' fallback dienen.
+                    # Aktuell: Ignorieren, um False Positives zu vermeiden.
+                    pass
+
     return out_pairs, "\n".join(keep_lines)
 
 def extract_wikilinks(text: str) -> List[str]:
     """Findet Standard-Wikilinks [[Target]] oder [[Alias|Target]]."""
     if not text: return []
-    # match.group(1) ist jetzt das Target (dank des fixierten Regex)
     return [m.strip() for m in _WIKILINK_RE.findall(text) if m.strip()]
\ No newline at end of file
diff --git a/app/frontend/ui_graph_service.py b/app/frontend/ui_graph_service.py
index c0164c8..2601d85 100644
--- a/app/frontend/ui_graph_service.py
+++ b/app/frontend/ui_graph_service.py
@@ -10,15 +10,19 @@ LAST_ANALYSIS: 2025-12-15
 import re
 from qdrant_client import QdrantClient, models
 from streamlit_agraph import Node, Edge
-from ui_config import GRAPH_COLORS, get_edge_color, SYSTEM_EDGES
+from ui_config import COLLECTION_PREFIX, GRAPH_COLORS, get_edge_color, SYSTEM_EDGES
 
 class GraphExplorerService:
-    def __init__(self, url, api_key=None, prefix="mindnet"):
+    def __init__(self, url, api_key=None, prefix=None):
+        """
+        Initialisiert den Service. Nutzt COLLECTION_PREFIX aus der Config, 
+        sofern kein spezifischer Prefix übergeben wurde.
+        """
         self.client = QdrantClient(url=url, api_key=api_key)
-        self.prefix = prefix
-        self.notes_col = f"{prefix}_notes"
-        self.chunks_col = f"{prefix}_chunks"
-        self.edges_col = f"{prefix}_edges"
+        self.prefix = prefix if prefix else COLLECTION_PREFIX
+        self.notes_col = f"{self.prefix}_notes"
+        self.chunks_col = f"{self.prefix}_chunks"
+        self.edges_col = f"{self.prefix}_edges"
         self._note_cache = {} 
 
     def get_note_with_full_content(self, note_id):
@@ -421,7 +425,7 @@ class GraphExplorerService:
 
     def _add_node_to_dict(self, node_dict, note_payload, level=1):
         nid = note_payload.get("note_id")
-        if nid in node_dict: return
+        if not nid or nid in node_dict: return
         
         ntype = note_payload.get("type", "default")
         color = GRAPH_COLORS.get(ntype, GRAPH_COLORS["default"])

From 0a429e1f7bec042b2f24bcb52e7551425bf8fa7b Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 11:45:25 +0100
Subject: [PATCH 08/33] anpassungen Kantenvergeleich

---
 app/core/graph/graph_derive_edges.py | 54 ++++++++++++++++------------
 app/core/graph/graph_utils.py        | 15 ++++++--
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py
index d12c5e8..1f880ff 100644
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@@ -1,7 +1,10 @@
 """
 FILE: app/core/graph/graph_derive_edges.py
 DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
-             AUDIT: Integriert parse_link_target für saubere Graphen-Topologie.
+             AUDIT: 
+             - Nutzt parse_link_target
+             - Übergibt Section als 'variant' an ID-Gen
+             - Dedup basiert jetzt auf Edge-ID (erlaubt Multigraph für Sections)
 """
 from typing import List, Optional, Dict, Tuple
 from .graph_utils import (
@@ -18,11 +21,11 @@ def build_edges_for_note(
     note_level_references: Optional[List[str]] = None,
     include_note_scope_refs: bool = False,
 ) -> List[dict]:
-    """Erzeugt und aggregiert alle Kanten für eine Note (WP-15b)."""
+    """Erzeugt und aggregiert alle Kanten für eine Note."""
     edges: List[dict] = []
     note_type = _get(chunks[0], "type") if chunks else "concept"
 
-    # 1) Struktur-Kanten (belongs_to, next/prev)
+    # 1) Struktur-Kanten
     for idx, ch in enumerate(chunks):
         cid = _get(ch, "chunk_id", "id")
         if not cid: continue
@@ -52,76 +55,78 @@ def build_edges_for_note(
         if not cid: continue
         raw = _get(ch, "window") or _get(ch, "text") or ""
 
-        # Typed & Candidate Pool (WP-15b Integration)
+        # Typed
         typed, rem = extract_typed_relations(raw)
         for k, raw_t in typed:
             t, sec = parse_link_target(raw_t, note_id)
             if not t: continue
-            
             payload = {
-                "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"),
+                "chunk_id": cid, 
+                # Variant=sec sorgt für eindeutige ID pro Abschnitt
+                "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel", variant=sec),
                 "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
             }
             if sec: payload["target_section"] = sec
-            
             edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
+        # Semantic AI Candidates
         pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
         for cand in pool:
             raw_t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
             t, sec = parse_link_target(raw_t, note_id)
             if t:
                 payload = {
-                    "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"),
+                    "chunk_id": cid, 
+                    "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}", variant=sec),
                     "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
                 }
                 if sec: payload["target_section"] = sec
-                
                 edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
-        # Callouts & Wikilinks
+        # Callouts
         call_pairs, rem2 = extract_callout_relations(rem)
         for k, raw_t in call_pairs:
             t, sec = parse_link_target(raw_t, note_id)
             if not t: continue
-            
             payload = {
-                "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"),
+                "chunk_id": cid, 
+                "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge", variant=sec),
                 "provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
             }
             if sec: payload["target_section"] = sec
-            
             edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
+        # Wikilinks & Defaults
         refs = extract_wikilinks(rem2)
         for raw_r in refs:
             r, sec = parse_link_target(raw_r, note_id)
             if not r: continue
             
+            # Explicit Reference
             payload = {
-                "chunk_id": cid, "ref_text": raw_r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
+                "chunk_id": cid, "ref_text": raw_r, 
+                "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink", variant=sec),
                 "provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
             }
             if sec: payload["target_section"] = sec
-            
             edges.append(_edge("references", "chunk", cid, r, note_id, payload))
             
+            # Defaults (nur einmal pro Target, Section hier irrelevant für Typ-Logik, oder?)
+            # Wir erzeugen Defaults auch pro Section, um Konsistenz zu wahren.
             for rel in defaults:
                 if rel != "references":
                     def_payload = {
-                        "chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"),
+                        "chunk_id": cid, 
+                        "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}", variant=sec),
                         "provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"]
                     }
                     if sec: def_payload["target_section"] = sec
                     edges.append(_edge(rel, "chunk", cid, r, note_id, def_payload))
         
-        # Für Note-Scope Sammlung nutzen wir den Original-String zur Dedup, aber gesäubert
         refs_all.extend([parse_link_target(r, note_id)[0] for r in refs])
 
     # 3) Note-Scope & De-Duplizierung
     if include_note_scope_refs:
-        # refs_all ist jetzt schon gesäubert (nur Targets)
-        # note_level_references müssen auch gesäubert werden
         cleaned_note_refs = [parse_link_target(r, note_id)[0] for r in (note_level_references or [])]
         refs_note = _dedupe_seq((refs_all or []) + cleaned_note_refs)
         
@@ -136,10 +141,13 @@ def build_edges_for_note(
                 "provenance": "rule", "confidence": PROVENANCE_PRIORITY["derived:backlink"]
             }))
 
-    unique_map: Dict[Tuple[str, str, str], dict] = {}
+    # Deduplizierung: Wir nutzen jetzt die EDGE-ID als Schlüssel.
+    # Da die Edge-ID nun 'variant' (Section) enthält, bleiben unterschiedliche Sections erhalten.
+    unique_map: Dict[str, dict] = {}
     for e in edges:
-        key = (str(e.get("source_id")), str(e.get("target_id")), str(e.get("kind")))
-        if key not in unique_map or e.get("confidence", 0) > unique_map[key].get("confidence", 0):
-            unique_map[key] = e
+        eid = e["edge_id"]
+        # Bei Konflikt (gleiche ID = exakt gleiche Kante und Section) gewinnt die höhere Confidence
+        if eid not in unique_map or e.get("confidence", 0) > unique_map[eid].get("confidence", 0):
+            unique_map[eid] = e
                 
     return list(unique_map.values())
\ No newline at end of file
diff --git a/app/core/graph/graph_utils.py b/app/core/graph/graph_utils.py
index d814ad7..fbdc51f 100644
--- a/app/core/graph/graph_utils.py
+++ b/app/core/graph/graph_utils.py
@@ -41,10 +41,19 @@ def _dedupe_seq(seq: Iterable[str]) -> List[str]:
             seen.add(s); out.append(s)
     return out
 
-def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None) -> str:
-    """Erzeugt eine deterministische 12-Byte ID mittels BLAKE2s."""
+def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None, variant: Optional[str] = None) -> str:
+    """
+    Erzeugt eine deterministische 12-Byte ID mittels BLAKE2s.
+    
+    WP-Fix: 'variant' (z.B. Section) fließt in den Hash ein, um mehrere Kanten 
+    zum gleichen Target-Node (aber unterschiedlichen Abschnitten) zu unterscheiden.
+    """
     base = f"{kind}:{s}->{t}#{scope}"
-    if rule_id: base += f"|{rule_id}"
+    if rule_id: 
+        base += f"|{rule_id}"
+    if variant: 
+        base += f"|{variant}"  # <--- Hier entsteht die Eindeutigkeit für verschiedene Sections
+        
     return hashlib.blake2s(base.encode("utf-8"), digest_size=12).hexdigest()
 
 def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:

From 38a61d7b509bbffc678108ab33048e75207faf03 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 12:21:57 +0100
Subject: [PATCH 09/33] Fix: Semantische Deduplizierung in
 graph_derive_edges.py

---
 app/core/graph/graph_derive_edges.py | 48 ++++++++++++++++++----------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py
index 1f880ff..2d20530 100644
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@@ -4,7 +4,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
              AUDIT: 
              - Nutzt parse_link_target
              - Übergibt Section als 'variant' an ID-Gen
-             - Dedup basiert jetzt auf Edge-ID (erlaubt Multigraph für Sections)
+             - FIXED: Semantische De-Duplizierung (ignoriert rule_id bei Konflikten)
 """
 from typing import List, Optional, Dict, Tuple
 from .graph_utils import (
@@ -21,11 +21,11 @@ def build_edges_for_note(
     note_level_references: Optional[List[str]] = None,
     include_note_scope_refs: bool = False,
 ) -> List[dict]:
-    """Erzeugt und aggregiert alle Kanten für eine Note."""
+    """Erzeugt und aggregiert alle Kanten für eine Note (WP-15b)."""
     edges: List[dict] = []
     note_type = _get(chunks[0], "type") if chunks else "concept"
 
-    # 1) Struktur-Kanten
+    # 1) Struktur-Kanten (belongs_to, next/prev)
     for idx, ch in enumerate(chunks):
         cid = _get(ch, "chunk_id", "id")
         if not cid: continue
@@ -55,21 +55,21 @@ def build_edges_for_note(
         if not cid: continue
         raw = _get(ch, "window") or _get(ch, "text") or ""
 
-        # Typed
+        # Typed & Candidate Pool (WP-15b Integration)
         typed, rem = extract_typed_relations(raw)
         for k, raw_t in typed:
             t, sec = parse_link_target(raw_t, note_id)
             if not t: continue
+            
             payload = {
                 "chunk_id": cid, 
-                # Variant=sec sorgt für eindeutige ID pro Abschnitt
                 "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel", variant=sec),
                 "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
             }
             if sec: payload["target_section"] = sec
+            
             edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
-        # Semantic AI Candidates
         pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
         for cand in pool:
             raw_t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
@@ -81,38 +81,38 @@ def build_edges_for_note(
                     "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
                 }
                 if sec: payload["target_section"] = sec
+                
                 edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
-        # Callouts
+        # Callouts & Wikilinks
         call_pairs, rem2 = extract_callout_relations(rem)
         for k, raw_t in call_pairs:
             t, sec = parse_link_target(raw_t, note_id)
             if not t: continue
+            
             payload = {
                 "chunk_id": cid, 
                 "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge", variant=sec),
                 "provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
             }
             if sec: payload["target_section"] = sec
+            
             edges.append(_edge(k, "chunk", cid, t, note_id, payload))
 
-        # Wikilinks & Defaults
         refs = extract_wikilinks(rem2)
         for raw_r in refs:
             r, sec = parse_link_target(raw_r, note_id)
             if not r: continue
             
-            # Explicit Reference
             payload = {
                 "chunk_id": cid, "ref_text": raw_r, 
                 "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink", variant=sec),
                 "provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
             }
             if sec: payload["target_section"] = sec
+            
             edges.append(_edge("references", "chunk", cid, r, note_id, payload))
             
-            # Defaults (nur einmal pro Target, Section hier irrelevant für Typ-Logik, oder?)
-            # Wir erzeugen Defaults auch pro Section, um Konsistenz zu wahren.
             for rel in defaults:
                 if rel != "references":
                     def_payload = {
@@ -141,13 +141,27 @@ def build_edges_for_note(
                 "provenance": "rule", "confidence": PROVENANCE_PRIORITY["derived:backlink"]
             }))
 
-    # Deduplizierung: Wir nutzen jetzt die EDGE-ID als Schlüssel.
-    # Da die Edge-ID nun 'variant' (Section) enthält, bleiben unterschiedliche Sections erhalten.
+    # FIX: Semantische Deduplizierung
+    # Wir nutzen einen Key aus (Source, Target, Kind, Section), um Duplikate 
+    # aus verschiedenen Regeln (z.B. callout vs. wikilink) zusammenzuführen.
     unique_map: Dict[str, dict] = {}
+    
     for e in edges:
-        eid = e["edge_id"]
-        # Bei Konflikt (gleiche ID = exakt gleiche Kante und Section) gewinnt die höhere Confidence
-        if eid not in unique_map or e.get("confidence", 0) > unique_map[eid].get("confidence", 0):
-            unique_map[eid] = e
+        # Semantischer Schlüssel: Unabhängig von rule_id oder edge_id
+        src = e.get("source_id", "")
+        tgt = e.get("target_id", "")
+        kind = e.get("kind", "")
+        sec = e.get("target_section", "")
+        
+        sem_key = f"{src}->{tgt}:{kind}@{sec}"
+        
+        if sem_key not in unique_map:
+            unique_map[sem_key] = e
+        else:
+            # Konfliktlösung: Die Kante mit der höheren Confidence gewinnt
+            curr_conf = unique_map[sem_key].get("confidence", 0.0)
+            new_conf = e.get("confidence", 0.0)
+            if new_conf > curr_conf:
+                unique_map[sem_key] = e
                 
     return list(unique_map.values())
\ No newline at end of file

From 03d3173ca6cbe5c69c87d97988ea62315bac9357 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 12:42:26 +0100
Subject: [PATCH 10/33] =?UTF-8?q?neu=20deduplizierung=20f=C3=BCr=20callout?=
 =?UTF-8?q?-edges?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/graph/graph_derive_edges.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py
index 2d20530..05ee59b 100644
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@@ -4,7 +4,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
              AUDIT: 
              - Nutzt parse_link_target
              - Übergibt Section als 'variant' an ID-Gen
-             - FIXED: Semantische De-Duplizierung (ignoriert rule_id bei Konflikten)
+             - FIXED: Semantische De-Duplizierung via 'sem_key' (löst das Callout-Problem)
 """
 from typing import List, Optional, Dict, Tuple
 from .graph_utils import (
@@ -63,6 +63,7 @@ def build_edges_for_note(
             
             payload = {
                 "chunk_id": cid, 
+                # Variant=sec sorgt für eindeutige ID pro Abschnitt
                 "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel", variant=sec),
                 "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
             }
@@ -127,6 +128,8 @@ def build_edges_for_note(
 
     # 3) Note-Scope & De-Duplizierung
     if include_note_scope_refs:
+        # refs_all ist jetzt schon gesäubert (nur Targets)
+        # note_level_references müssen auch gesäubert werden
         cleaned_note_refs = [parse_link_target(r, note_id)[0] for r in (note_level_references or [])]
         refs_note = _dedupe_seq((refs_all or []) + cleaned_note_refs)
         
@@ -141,9 +144,12 @@ def build_edges_for_note(
                 "provenance": "rule", "confidence": PROVENANCE_PRIORITY["derived:backlink"]
             }))
 
+    # ----------------------------------------------------------------------------------
     # FIX: Semantische Deduplizierung
+    # Hier lösen wir das Problem, dass Callout-Kanten andere überschreiben.
     # Wir nutzen einen Key aus (Source, Target, Kind, Section), um Duplikate 
     # aus verschiedenen Regeln (z.B. callout vs. wikilink) zusammenzuführen.
+    # ----------------------------------------------------------------------------------
     unique_map: Dict[str, dict] = {}
     
     for e in edges:
@@ -153,6 +159,8 @@ def build_edges_for_note(
         kind = e.get("kind", "")
         sec = e.get("target_section", "")
         
+        # Dieser Key ist für alle Einträge im Callout-Block UNTERSCHIEDLICH,
+        # da 'sec' (1) Integrität, 3) Disziplin...) unterschiedlich ist.
         sem_key = f"{src}->{tgt}:{kind}@{sec}"
         
         if sem_key not in unique_map:

From b7d1bcce3d6fa2cf1b49d3cc815f18d2f98c073e Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 18:04:14 +0100
Subject: [PATCH 11/33] =?UTF-8?q?R=C3=BCcksprung=20zur=20Vorwersion,=20in?=
 =?UTF-8?q?=20der=202=20Kantentypen=20angelegt=20wurden?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/graph/graph_derive_edges.py | 34 +++++++---------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py
index 05ee59b..687de6d 100644
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@@ -4,7 +4,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
              AUDIT: 
              - Nutzt parse_link_target
              - Übergibt Section als 'variant' an ID-Gen
-             - FIXED: Semantische De-Duplizierung via 'sem_key' (löst das Callout-Problem)
+             - Dedup basiert jetzt auf Edge-ID (erlaubt Multigraph für Sections)
 """
 from typing import List, Optional, Dict, Tuple
 from .graph_utils import (
@@ -124,6 +124,7 @@ def build_edges_for_note(
                     if sec: def_payload["target_section"] = sec
                     edges.append(_edge(rel, "chunk", cid, r, note_id, def_payload))
         
+        # Für Note-Scope Sammlung nutzen wir den Original-String zur Dedup, aber gesäubert
         refs_all.extend([parse_link_target(r, note_id)[0] for r in refs])
 
     # 3) Note-Scope & De-Duplizierung
@@ -144,32 +145,13 @@ def build_edges_for_note(
                 "provenance": "rule", "confidence": PROVENANCE_PRIORITY["derived:backlink"]
             }))
 
-    # ----------------------------------------------------------------------------------
-    # FIX: Semantische Deduplizierung
-    # Hier lösen wir das Problem, dass Callout-Kanten andere überschreiben.
-    # Wir nutzen einen Key aus (Source, Target, Kind, Section), um Duplikate 
-    # aus verschiedenen Regeln (z.B. callout vs. wikilink) zusammenzuführen.
-    # ----------------------------------------------------------------------------------
+    # Deduplizierung: Wir nutzen jetzt die EDGE-ID als Schlüssel.
+    # Da die Edge-ID nun 'variant' (Section) enthält, bleiben unterschiedliche Sections erhalten.
     unique_map: Dict[str, dict] = {}
-    
     for e in edges:
-        # Semantischer Schlüssel: Unabhängig von rule_id oder edge_id
-        src = e.get("source_id", "")
-        tgt = e.get("target_id", "")
-        kind = e.get("kind", "")
-        sec = e.get("target_section", "")
-        
-        # Dieser Key ist für alle Einträge im Callout-Block UNTERSCHIEDLICH,
-        # da 'sec' (1) Integrität, 3) Disziplin...) unterschiedlich ist.
-        sem_key = f"{src}->{tgt}:{kind}@{sec}"
-        
-        if sem_key not in unique_map:
-            unique_map[sem_key] = e
-        else:
-            # Konfliktlösung: Die Kante mit der höheren Confidence gewinnt
-            curr_conf = unique_map[sem_key].get("confidence", 0.0)
-            new_conf = e.get("confidence", 0.0)
-            if new_conf > curr_conf:
-                unique_map[sem_key] = e
+        eid = e["edge_id"]
+        # Bei Konflikt (gleiche ID = exakt gleiche Kante und Section) gewinnt die höhere Confidence
+        if eid not in unique_map or e.get("confidence", 0) > unique_map[eid].get("confidence", 0):
+            unique_map[eid] = e
                 
     return list(unique_map.values())
\ No newline at end of file

From 8f5eb36b5fa836109e2a03c111b54fbbb48c4a24 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 20:16:23 +0100
Subject: [PATCH 12/33] =?UTF-8?q?neuer=20Chunking=20parser,=20der=20Headin?=
 =?UTF-8?q?gs=20mitf=C3=BChrt=20und=20nicht=20mitten=20im=20Abschnitt=20sc?=
 =?UTF-8?q?hneidet?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_strategies.py | 168 +++++++++++------------
 1 file changed, 84 insertions(+), 84 deletions(-)

diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 8945fee..5e09512 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,10 +1,9 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Mathematische Splitting-Strategien.
-             AUDIT v3.3.2: 100% Konformität zur 'by_heading' Spezifikation.
-             - Implementiert Hybrid-Safety-Net (Sliding Window für Übergrößen).
-             - Breadcrumb-Kontext im Window (H1 > H2).
-             - Sliding Window mit H1-Kontext (Gold-Standard v3.1.0).
+DESCRIPTION: Korrigierte Splitting-Strategien für Mindnet v3.3.3.
+             - Fix: Erhalt von Überschriften im Chunk-Text.
+             - Fix: Atomares Buffering (Blöcke fallen als Ganzes in den nächsten Chunk).
+             - Fix: Korrekte Zuordnung von Sektions-Metadaten.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -19,71 +18,9 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
     prefix = " > ".join(parts)
     return f"{prefix}\n{text}".strip() if prefix else text
 
-def strategy_sliding_window(blocks: List[RawBlock], 
-                            config: Dict[str, Any], 
-                            note_id: str, 
-                            context_prefix: str = "") -> List[Chunk]:
-    """
-    Fasst Blöcke zusammen und schneidet bei 'target' Tokens.
-    Ignoriert H2-Überschriften beim Splitting, um Kontext zu wahren.
-    """
-    target = config.get("target", 400)
-    max_tokens = config.get("max", 600)
-    overlap_val = config.get("overlap", (50, 80))
-    overlap = sum(overlap_val) // 2 if isinstance(overlap_val, tuple) else overlap_val
-    
-    chunks: List[Chunk] = []
-    buf: List[RawBlock] = []
-
-    def _add(txt, sec, path):
-        idx = len(chunks)
-        # H1-Kontext Präfix für das Window-Feld
-        win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(
-            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=estimate_tokens(txt),
-            section_title=sec, section_path=path,
-            neighbors_prev=None, neighbors_next=None
-        ))
-
-    def flush():
-        nonlocal buf
-        if not buf: return
-        text_body = "\n\n".join([b.text for b in buf])
-        sec_title = buf[-1].section_title; sec_path = buf[-1].section_path
-        
-        if estimate_tokens(text_body) <= max_tokens:
-            _add(text_body, sec_title, sec_path)
-        else:
-            sents = split_sentences(text_body); cur_sents = []; cur_len = 0
-            for s in sents:
-                slen = estimate_tokens(s)
-                if cur_len + slen > target and cur_sents:
-                    _add(" ".join(cur_sents), sec_title, sec_path)
-                    ov_s = []; ov_l = 0
-                    for os in reversed(cur_sents):
-                        if ov_l + estimate_tokens(os) < overlap:
-                            ov_s.insert(0, os); ov_l += estimate_tokens(os)
-                        else: break
-                    cur_sents = list(ov_s); cur_sents.append(s); cur_len = ov_l + slen
-                else:
-                    cur_sents.append(s); cur_len += slen
-            if cur_sents:
-                _add(" ".join(cur_sents), sec_title, sec_path)
-        buf = []
-
-    for b in blocks:
-        # H2-Überschriften werden ignoriert, um den Zusammenhang zu wahren
-        if b.kind == "heading": continue 
-        if estimate_tokens("\n\n".join([x.text for x in buf])) + estimate_tokens(b.text) >= target:
-            flush()
-        buf.append(b)
-    flush()
-    return chunks
-
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Splittet Text basierend auf Markdown-Überschriften mit Hybrid-Safety-Net.
+    Splittet Text basierend auf Markdown-Überschriften mit atomarem Block-Erhalt.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
@@ -92,7 +29,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     overlap = sum(config.get("overlap", (50, 80))) // 2
     
     chunks: List[Chunk] = []
-    buf: List[str] = []
+    buf: List[RawBlock] = []
     cur_tokens = 0
 
     def _add_to_chunks(txt, title, path):
@@ -105,18 +42,26 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             neighbors_prev=None, neighbors_next=None
         ))
 
-    def _flush(title, path):
+    def _flush():
         nonlocal buf, cur_tokens
         if not buf: return
-        full_text = "\n\n".join(buf)
+        
+        # Metadaten stammen immer vom ersten Block im Puffer (meist die Überschrift)
+        main_title = buf[0].section_title
+        main_path = buf[0].section_path
+        full_text = "\n\n".join([b.text for b in buf])
+        
+        # Falls der gesamte Puffer in einen Chunk passt
         if estimate_tokens(full_text) <= max_tokens:
-            _add_to_chunks(full_text, title, path)
+            _add_to_chunks(full_text, main_title, main_path)
         else:
-            sents = split_sentences(full_text); cur_sents = []; sub_len = 0
+            # Nur wenn ein einzelner Abschnitt größer als 'max' ist, wird intern gesplittet
+            sents = split_sentences(full_text)
+            cur_sents = []; sub_len = 0
             for s in sents:
                 slen = estimate_tokens(s)
                 if sub_len + slen > target and cur_sents:
-                    _add_to_chunks(" ".join(cur_sents), title, path)
+                    _add_to_chunks(" ".join(cur_sents), main_title, main_path)
                     ov_s = []; ov_l = 0
                     for os in reversed(cur_sents):
                         if ov_l + estimate_tokens(os) < overlap:
@@ -124,19 +69,74 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                         else: break
                     cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
                 else: cur_sents.append(s); sub_len += slen
-            if cur_sents: _add_to_chunks(" ".join(cur_sents), title, path)
+            if cur_sents: _add_to_chunks(" ".join(cur_sents), main_title, main_path)
+        
         buf = []; cur_tokens = 0
 
     for b in blocks:
+        b_tokens = estimate_tokens(b.text)
+        
+        # Prüfung auf Split-Trigger (Überschriften)
+        is_split_trigger = False
         if b.kind == "heading":
-            if b.level < split_level: _flush(b.section_title, b.section_path)
+            if b.level < split_level:
+                is_split_trigger = True
             elif b.level == split_level:
-                if strict or cur_tokens >= target: _flush(b.section_title, b.section_path)
-            continue
-        bt = estimate_tokens(b.text)
-        if cur_tokens + bt > max_tokens and buf: _flush(b.section_title, b.section_path)
-        buf.append(b.text); cur_tokens += bt
-    if buf:
-        last_b = blocks[-1] if blocks else None
-        _flush(last_b.section_title if last_b else None, last_b.section_path if last_b else "/")
+                if strict or cur_tokens >= target:
+                    is_split_trigger = True
+        
+        if is_split_trigger:
+            _flush() # Vorherigen Puffer leeren
+            buf.append(b) # Neue Überschrift in den neuen Puffer aufnehmen
+            cur_tokens = b_tokens
+        else:
+            # Atomarer Check: Wenn der neue Block den aktuellen Chunk sprengen würde
+            if cur_tokens + b_tokens > max_tokens and buf:
+                _flush() # Puffer leeren, Block 'b' wird Teil des nächsten Chunks
+                
+            buf.append(b)
+            cur_tokens += b_tokens
+
+    _flush() # Letzten Puffer leeren
+    return chunks
+
+def strategy_sliding_window(blocks: List[RawBlock], 
+                            config: Dict[str, Any], 
+                            note_id: str, 
+                            context_prefix: str = "") -> List[Chunk]:
+    """
+    Standard Sliding Window mit Korrektur für Heading-Retention.
+    """
+    target = config.get("target", 400)
+    max_tokens = config.get("max", 600)
+    overlap_val = config.get("overlap", (50, 80))
+    overlap = sum(overlap_val) // 2 if isinstance(overlap_val, tuple) else overlap_val
+    
+    chunks: List[Chunk] = []
+    buf: List[RawBlock] = []
+
+    def _flush_window():
+        nonlocal buf
+        if not buf: return
+        txt = "\n\n".join([b.text for b in buf])
+        idx = len(chunks)
+        win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
+        chunks.append(Chunk(
+            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
+            text=txt, window=win, token_count=estimate_tokens(txt),
+            section_title=buf[0].section_title, section_path=buf[0].section_path,
+            neighbors_prev=None, neighbors_next=None
+        ))
+        buf = []
+
+    for b in blocks:
+        # Auch hier: Überschriften mitnehmen
+        b_tokens = estimate_tokens(b.text)
+        current_buf_tokens = estimate_tokens("\n\n".join([x.text for x in buf])) if buf else 0
+        
+        if current_buf_tokens + b_tokens >= target and buf:
+            _flush_window()
+        buf.append(b)
+        
+    _flush_window()
     return chunks
\ No newline at end of file

From 838083b9095710154a6bd7f1d8f3a987b07aafc0 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 20:33:43 +0100
Subject: [PATCH 13/33] =?UTF-8?q?Verbesserung=20des=20Chunking-Parsers=20z?=
 =?UTF-8?q?ur=20Unterst=C3=BCtzung=20von=20H1-=C3=9Cberschriften=20und=20A?=
 =?UTF-8?q?npassung=20der=20Metadatenlogik.=20Implementierung=20einer=20at?=
 =?UTF-8?q?omaren=20Sektions-Chunking-Strategie,=20die=20=C3=9Cberschrifte?=
 =?UTF-8?q?n=20und=20deren=20Inhalte=20zusammenh=C3=A4lt.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 45 +++++++++--------
 app/core/chunking/chunking_strategies.py | 64 +++++++++++++-----------
 2 files changed, 61 insertions(+), 48 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 3d56f55..2ec45bc 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -17,53 +17,58 @@ def split_sentences(text: str) -> list[str]:
     return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
-    """Zerlegt Text in logische Einheiten."""
+    """Zerlegt Text in logische Einheiten, inklusive H1."""
     blocks = []
-    h1_title = "Dokument"; section_path = "/"; current_h2 = None
+    h1_title = "Dokument"; section_path = "/"; current_section_title = None
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
+    
+    # H1 für Note-Metadaten extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: h1_title = h1_match.group(1).strip()
+    
     lines = text_without_fm.split('\n')
     buffer = []
     
     for line in lines:
         stripped = line.strip()
         
-        # H1 ignorieren (ist Doc Title)
-        if stripped.startswith('# '): 
-            continue 
-        
-        # Generische Heading-Erkennung (H2 bis H6) für flexible Split-Levels
-        heading_match = re.match(r'^(#{2,6})\s+(.*)', stripped)
+        # Heading-Erkennung (H1 bis H6)
+        heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
-            # Buffer leeren (vorherigen Text abschließen)
             if buffer:
                 content = "\n".join(buffer).strip()
-                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+                if content: 
+                    blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
             
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Pfad-Logik: H2 setzt den Haupt-Pfad
-            if level == 2:
-                current_h2 = title
-                section_path = f"/{current_h2}"
-            # Bei H3+ bleibt der section_path beim Parent, aber das Level wird korrekt gesetzt
+            # Metadaten-Update
+            if level == 1:
+                current_section_title = title
+                section_path = "/"
+            elif level == 2:
+                current_section_title = title
+                section_path = f"/{current_section_title}"
             
-            blocks.append(RawBlock("heading", stripped, level, section_path, current_h2))
-            
-        elif not stripped:
+            blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
+            continue
+
+        if not stripped:
             if buffer:
                 content = "\n".join(buffer).strip()
-                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+                if content: 
+                    blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
         else: 
             buffer.append(line)
             
     if buffer:
         content = "\n".join(buffer).strip()
-        if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+        if content: 
+            blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+            
     return blocks, h1_title
 
 def parse_edges_robust(text: str) -> Set[str]:
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 5e09512..3c939ec 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -20,7 +20,8 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Splittet Text basierend auf Markdown-Überschriften mit atomarem Block-Erhalt.
+    Implementiert atomares Sektions-Chunking. 
+    Hält Überschriften und ihren Inhalt (inkl. Edges) zusammen.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
@@ -45,23 +46,21 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     def _flush():
         nonlocal buf, cur_tokens
         if not buf: return
-        
-        # Metadaten stammen immer vom ersten Block im Puffer (meist die Überschrift)
         main_title = buf[0].section_title
         main_path = buf[0].section_path
         full_text = "\n\n".join([b.text for b in buf])
         
-        # Falls der gesamte Puffer in einen Chunk passt
         if estimate_tokens(full_text) <= max_tokens:
             _add_to_chunks(full_text, main_title, main_path)
         else:
-            # Nur wenn ein einzelner Abschnitt größer als 'max' ist, wird intern gesplittet
+            # Fallback: Nur wenn eine Sektion ALLEINE zu groß ist, wird intern gesplittet
             sents = split_sentences(full_text)
             cur_sents = []; sub_len = 0
             for s in sents:
                 slen = estimate_tokens(s)
                 if sub_len + slen > target and cur_sents:
                     _add_to_chunks(" ".join(cur_sents), main_title, main_path)
+                    # Overlap-Logik...
                     ov_s = []; ov_l = 0
                     for os in reversed(cur_sents):
                         if ov_l + estimate_tokens(os) < overlap:
@@ -70,34 +69,43 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                     cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
                 else: cur_sents.append(s); sub_len += slen
             if cur_sents: _add_to_chunks(" ".join(cur_sents), main_title, main_path)
-        
         buf = []; cur_tokens = 0
 
+    # SCHRITT 1: Gruppierung in atomare Sektions-Einheiten
+    sections = []
+    curr_sec = []
     for b in blocks:
-        b_tokens = estimate_tokens(b.text)
-        
-        # Prüfung auf Split-Trigger (Überschriften)
-        is_split_trigger = False
-        if b.kind == "heading":
-            if b.level < split_level:
-                is_split_trigger = True
-            elif b.level == split_level:
-                if strict or cur_tokens >= target:
-                    is_split_trigger = True
-        
-        if is_split_trigger:
-            _flush() # Vorherigen Puffer leeren
-            buf.append(b) # Neue Überschrift in den neuen Puffer aufnehmen
-            cur_tokens = b_tokens
+        # Ein Split-Trigger startet eine neue Sektion
+        if b.kind == "heading" and b.level <= split_level:
+            if curr_sec: sections.append(curr_sec)
+            curr_sec = [b]
         else:
-            # Atomarer Check: Wenn der neue Block den aktuellen Chunk sprengen würde
-            if cur_tokens + b_tokens > max_tokens and buf:
-                _flush() # Puffer leeren, Block 'b' wird Teil des nächsten Chunks
-                
-            buf.append(b)
-            cur_tokens += b_tokens
+            curr_sec.append(b)
+    if curr_sec: sections.append(curr_sec)
 
-    _flush() # Letzten Puffer leeren
+    # SCHRITT 2: Verarbeitung der Sektionen mit Vorausschau
+    for sec in sections:
+        sec_tokens = sum(estimate_tokens(b.text) for b in sec)
+        
+        if buf:
+            # PRÜFUNG: Passt die gesamte Sektion noch in den aktuellen Chunk?
+            if cur_tokens + sec_tokens > max_tokens:
+                _flush()
+            # PRÜFUNG: Harter Split gefordert?
+            elif strict:
+                _flush()
+            # PRÜFUNG: Weicher Split (Target erreicht)?
+            elif cur_tokens >= target:
+                _flush()
+        
+        buf.extend(sec)
+        cur_tokens += sec_tokens
+        
+        # Falls die Sektion selbst das Limit sprengt, sofort flashen
+        if cur_tokens >= max_tokens:
+            _flush()
+
+    _flush()
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], 

From 7eba1fb487d8a7784f0cccf8662dc51ccff7c242 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 20:45:04 +0100
Subject: [PATCH 14/33] =?UTF-8?q?Aktualisierung=20des=20Chunking-Parsers?=
 =?UTF-8?q?=20zur=20Unterst=C3=BCtzung=20aller=20=C3=9Cberschriften=20im?=
 =?UTF-8?q?=20Stream=20und=20Verbesserung=20der=20Metadatenverarbeitung.?=
 =?UTF-8?q?=20Anpassungen=20an=20der=20atomaren=20Sektions-Chunking-Strate?=
 =?UTF-8?q?gie=20zur=20besseren=20Handhabung=20von=20Blockinhalten=20und?=
 =?UTF-8?q?=20Token-Sch=C3=A4tzungen.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 15 ++--
 app/core/chunking/chunking_strategies.py | 95 +++++++++++-------------
 2 files changed, 52 insertions(+), 58 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 2ec45bc..95e2fad 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,6 +1,6 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in Blöcke und extrahiert Kanten-Strings.
+DESCRIPTION: Zerlegt Markdown in Blöcke. Hält H1-Überschriften im Stream.
 """
 import re
 from typing import List, Tuple, Set
@@ -17,12 +17,12 @@ def split_sentences(text: str) -> list[str]:
     return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
-    """Zerlegt Text in logische Einheiten, inklusive H1."""
+    """Zerlegt Text in logische Einheiten, inklusive aller Überschriften."""
     blocks = []
     h1_title = "Dokument"; section_path = "/"; current_section_title = None
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Note-Metadaten extrahieren
+    # H1 für Note-Titel extrahieren (Metadaten)
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: h1_title = h1_match.group(1).strip()
     
@@ -35,6 +35,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         # Heading-Erkennung (H1 bis H6)
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
+            # Vorherigen Text-Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -44,7 +45,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Metadaten-Update
+            # Pfad- und Titel-Update
             if level == 1:
                 current_section_title = title
                 section_path = "/"
@@ -55,7 +56,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        if not stripped:
+        if not stripped and not line.startswith('>'): # Leerzeilen (außer in Callouts) trennen Blöcke
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -79,6 +80,7 @@ def parse_edges_robust(text: str) -> Set[str]:
         k = kind.strip().lower()
         t = target.strip()
         if k and t: found_edges.add(f"{k}:{t}")
+    
     lines = text.split('\n')
     current_edge_type = None
     for line in lines:
@@ -94,5 +96,6 @@ def parse_edges_robust(text: str) -> Set[str]:
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
                 if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
-        elif not stripped.startswith('>'): current_edge_type = None
+        elif not stripped.startswith('>'): 
+            current_edge_type = None
     return found_edges
\ No newline at end of file
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 3c939ec..1a15bba 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,9 +1,6 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Korrigierte Splitting-Strategien für Mindnet v3.3.3.
-             - Fix: Erhalt von Überschriften im Chunk-Text.
-             - Fix: Atomares Buffering (Blöcke fallen als Ganzes in den nächsten Chunk).
-             - Fix: Korrekte Zuordnung von Sektions-Metadaten.
+DESCRIPTION: Strategien für atomares Sektions-Chunking (WP-15b konform).
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -20,8 +17,7 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Implementiert atomares Sektions-Chunking. 
-    Hält Überschriften und ihren Inhalt (inkl. Edges) zusammen.
+    Gruppiert Blöcke zu Sektionen und hält diese atomar zusammen.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
@@ -43,9 +39,10 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             neighbors_prev=None, neighbors_next=None
         ))
 
-    def _flush():
+    def _flush_buffer():
         nonlocal buf, cur_tokens
         if not buf: return
+        
         main_title = buf[0].section_title
         main_path = buf[0].section_path
         full_text = "\n\n".join([b.text for b in buf])
@@ -53,14 +50,14 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         if estimate_tokens(full_text) <= max_tokens:
             _add_to_chunks(full_text, main_title, main_path)
         else:
-            # Fallback: Nur wenn eine Sektion ALLEINE zu groß ist, wird intern gesplittet
+            # Nur wenn eine Sektion ALLEINE zu groß ist, wird intern gesplittet
             sents = split_sentences(full_text)
             cur_sents = []; sub_len = 0
             for s in sents:
                 slen = estimate_tokens(s)
                 if sub_len + slen > target and cur_sents:
                     _add_to_chunks(" ".join(cur_sents), main_title, main_path)
-                    # Overlap-Logik...
+                    # Overlap Logic
                     ov_s = []; ov_l = 0
                     for os in reversed(cur_sents):
                         if ov_l + estimate_tokens(os) < overlap:
@@ -69,11 +66,13 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                     cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
                 else: cur_sents.append(s); sub_len += slen
             if cur_sents: _add_to_chunks(" ".join(cur_sents), main_title, main_path)
+        
         buf = []; cur_tokens = 0
 
-    # SCHRITT 1: Gruppierung in atomare Sektions-Einheiten
-    sections = []
-    curr_sec = []
+    # SCHRITT 1: Gruppierung in atomare Sektions-Einheiten (Heading + Paragraphs)
+    sections: List[List[RawBlock]] = []
+    curr_sec: List[RawBlock] = []
+    
     for b in blocks:
         # Ein Split-Trigger startet eine neue Sektion
         if b.kind == "heading" and b.level <= split_level:
@@ -85,66 +84,58 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
 
     # SCHRITT 2: Verarbeitung der Sektionen mit Vorausschau
     for sec in sections:
-        sec_tokens = sum(estimate_tokens(b.text) for b in sec)
+        # Token-Schätzung für die gesamte Sektion inkl. Newline-Overhead
+        sec_text = "\n\n".join([b.text for b in sec])
+        sec_tokens = estimate_tokens(sec_text)
         
         if buf:
-            # PRÜFUNG: Passt die gesamte Sektion noch in den aktuellen Chunk?
+            # Passt die Sektion noch in den aktuellen Chunk?
             if cur_tokens + sec_tokens > max_tokens:
-                _flush()
-            # PRÜFUNG: Harter Split gefordert?
-            elif strict:
-                _flush()
-            # PRÜFUNG: Weicher Split (Target erreicht)?
+                _flush_buffer()
+            # Wenn strict: Jede neue Sektion auf split_level erzwingt neuen Chunk
+            elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
+                _flush_buffer()
+            # Wenn target erreicht: Neue Sektion startet neuen Chunk
             elif cur_tokens >= target:
-                _flush()
+                _flush_buffer()
         
         buf.extend(sec)
         cur_tokens += sec_tokens
         
-        # Falls die Sektion selbst das Limit sprengt, sofort flashen
+        # Falls der Puffer (selbst nach flush) durch eine Riesen-Sektion zu groß ist
         if cur_tokens >= max_tokens:
-            _flush()
+            _flush_buffer()
 
-    _flush()
+    _flush_buffer()
     return chunks
 
-def strategy_sliding_window(blocks: List[RawBlock], 
-                            config: Dict[str, Any], 
-                            note_id: str, 
-                            context_prefix: str = "") -> List[Chunk]:
-    """
-    Standard Sliding Window mit Korrektur für Heading-Retention.
-    """
+def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
+    # (Identische Korrektur wie oben für Sliding Window, falls benötigt)
+    # Hier halten wir es einfach: Blöcke nacheinander bis target.
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
-    overlap_val = config.get("overlap", (50, 80))
-    overlap = sum(overlap_val) // 2 if isinstance(overlap_val, tuple) else overlap_val
     
     chunks: List[Chunk] = []
     buf: List[RawBlock] = []
-
-    def _flush_window():
-        nonlocal buf
-        if not buf: return
-        txt = "\n\n".join([b.text for b in buf])
-        idx = len(chunks)
-        win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(
-            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=estimate_tokens(txt),
-            section_title=buf[0].section_title, section_path=buf[0].section_path,
-            neighbors_prev=None, neighbors_next=None
-        ))
-        buf = []
-
+    
     for b in blocks:
-        # Auch hier: Überschriften mitnehmen
         b_tokens = estimate_tokens(b.text)
-        current_buf_tokens = estimate_tokens("\n\n".join([x.text for x in buf])) if buf else 0
+        current_tokens = estimate_tokens("\n\n".join([x.text for x in buf])) if buf else 0
         
-        if current_buf_tokens + b_tokens >= target and buf:
-            _flush_window()
+        if current_tokens + b_tokens > max_tokens and buf:
+            txt = "\n\n".join([x.text for x in buf])
+            idx = len(chunks)
+            win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
+            chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=current_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+            buf = []
+            current_tokens = 0
+            
         buf.append(b)
         
-    _flush_window()
+    if buf:
+        txt = "\n\n".join([x.text for x in buf])
+        idx = len(chunks)
+        win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
+        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        
     return chunks
\ No newline at end of file

From 1b40e29f40084e3d57c278b29a932676aeb91889 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 20:57:07 +0100
Subject: [PATCH 15/33] =?UTF-8?q?Optimierung=20des=20Chunking-Parsers=20zu?=
 =?UTF-8?q?r=20Unterst=C3=BCtzung=20atomares=20Chunking=20und=20verbessert?=
 =?UTF-8?q?e=20Block-Trennung.=20Anpassungen=20an=20der=20Sektions-Chunkin?=
 =?UTF-8?q?g-Strategie=20zur=20Wahrung=20von=20Sektionsgrenzen=20und=20Ver?=
 =?UTF-8?q?meidung=20von=20=C3=9Cberh=C3=A4ngen.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 12 ++++---
 app/core/chunking/chunking_strategies.py | 43 +++++++++++++++---------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 95e2fad..6bc866d 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,6 +1,7 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in Blöcke. Hält H1-Überschriften im Stream.
+DESCRIPTION: Zerlegt Markdown in Blöcke. Hält H1-Überschriften im Stream 
+             und optimiert die Block-Trennung für atomares Chunking.
 """
 import re
 from typing import List, Tuple, Set
@@ -24,7 +25,8 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     
     # H1 für Note-Titel extrahieren (Metadaten)
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
-    if h1_match: h1_title = h1_match.group(1).strip()
+    if h1_match: 
+        h1_title = h1_match.group(1).strip()
     
     lines = text_without_fm.split('\n')
     buffer = []
@@ -45,7 +47,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Pfad- und Titel-Update
+            # Pfad- und Titel-Update für die Metadaten der folgenden Blöcke
             if level == 1:
                 current_section_title = title
                 section_path = "/"
@@ -53,10 +55,12 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
                 current_section_title = title
                 section_path = f"/{current_section_title}"
             
+            # Die Überschrift selbst als Block hinzufügen (Fix: H1 wird nicht mehr gefiltert)
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        if not stripped and not line.startswith('>'): # Leerzeilen (außer in Callouts) trennen Blöcke
+        # Leerzeilen trennen Blöcke, außer innerhalb von Callouts
+        if not stripped and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 1a15bba..af19d4d 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,6 +1,7 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
 DESCRIPTION: Strategien für atomares Sektions-Chunking (WP-15b konform).
+             Fix: Vorausschauende Trennung zur Wahrung von Sektionsgrenzen.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -18,6 +19,7 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
     Gruppiert Blöcke zu Sektionen und hält diese atomar zusammen.
+    Nutzt Look-Ahead, um Sektions-Überhänge zu vermeiden.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
@@ -47,25 +49,38 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         main_path = buf[0].section_path
         full_text = "\n\n".join([b.text for b in buf])
         
+        # Falls die gruppierten Sektionen in das Limit passen
         if estimate_tokens(full_text) <= max_tokens:
             _add_to_chunks(full_text, main_title, main_path)
         else:
             # Nur wenn eine Sektion ALLEINE zu groß ist, wird intern gesplittet
             sents = split_sentences(full_text)
             cur_sents = []; sub_len = 0
+            # Kontext-Sicherung: Heading für Teil-Chunks merken
+            header_text = buf[0].text if buf[0].kind == "heading" else ""
+            
             for s in sents:
                 slen = estimate_tokens(s)
                 if sub_len + slen > target and cur_sents:
                     _add_to_chunks(" ".join(cur_sents), main_title, main_path)
-                    # Overlap Logic
-                    ov_s = []; ov_l = 0
+                    
+                    # Overlap-Erzeugung und Header-Injektion für Folgeschritte
+                    ov_s = [header_text] if header_text else []
+                    ov_l = estimate_tokens(header_text) if header_text else 0
                     for os in reversed(cur_sents):
-                        if ov_l + estimate_tokens(os) < overlap:
-                            ov_s.insert(0, os); ov_l += estimate_tokens(os)
+                        if os == header_text: continue
+                        t_len = estimate_tokens(os)
+                        if ov_l + t_len < overlap:
+                            ov_s.insert(len(ov_s)-1 if header_text else 0, os)
+                            ov_l += t_len
                         else: break
-                    cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
-                else: cur_sents.append(s); sub_len += slen
-            if cur_sents: _add_to_chunks(" ".join(cur_sents), main_title, main_path)
+                    cur_sents = list(ov_s); cur_sents.append(s)
+                    sub_len = ov_l + slen
+                else:
+                    cur_sents.append(s); sub_len += slen
+            
+            if cur_sents:
+                _add_to_chunks(" ".join(cur_sents), main_title, main_path)
         
         buf = []; cur_tokens = 0
 
@@ -74,7 +89,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     curr_sec: List[RawBlock] = []
     
     for b in blocks:
-        # Ein Split-Trigger startet eine neue Sektion
+        # Ein Split-Trigger (H1 oder H2) startet eine neue atomare Sektion
         if b.kind == "heading" and b.level <= split_level:
             if curr_sec: sections.append(curr_sec)
             curr_sec = [b]
@@ -82,16 +97,15 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # SCHRITT 2: Verarbeitung der Sektionen mit Vorausschau
+    # SCHRITT 2: Verarbeitung der Sektionen mit Vorausschau (Look-Ahead)
     for sec in sections:
-        # Token-Schätzung für die gesamte Sektion inkl. Newline-Overhead
         sec_text = "\n\n".join([b.text for b in sec])
         sec_tokens = estimate_tokens(sec_text)
         
         if buf:
-            # Passt die Sektion noch in den aktuellen Chunk?
+            # VORAUSSCHAU: Würde die neue Sektion das Limit sprengen?
             if cur_tokens + sec_tokens > max_tokens:
-                _flush_buffer()
+                _flush_buffer() # Beende den aktuellen Chunk sauber VOR der neuen Sektion
             # Wenn strict: Jede neue Sektion auf split_level erzwingt neuen Chunk
             elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
                 _flush_buffer()
@@ -102,7 +116,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         buf.extend(sec)
         cur_tokens += sec_tokens
         
-        # Falls der Puffer (selbst nach flush) durch eine Riesen-Sektion zu groß ist
+        # Falls eine einzelne Sektion nach dem Flush (oder als erste) schon zu groß ist
         if cur_tokens >= max_tokens:
             _flush_buffer()
 
@@ -110,8 +124,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    # (Identische Korrektur wie oben für Sliding Window, falls benötigt)
-    # Hier halten wir es einfach: Blöcke nacheinander bis target.
+    """Basis-Sliding-Window für flache Texte ohne Sektionsfokus."""
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     

From f9ac4e4dbff345910c99c3b131366a942a64ebb3 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 21:05:42 +0100
Subject: [PATCH 16/33] =?UTF-8?q?Verbesserung=20der=20atomaren=20Sektions-?=
 =?UTF-8?q?Chunking-Strategie=20durch=20Einf=C3=BChrung=20strikter=20Look-?=
 =?UTF-8?q?Ahead-Logik=20und=20pr=C3=A4ventiven=20Flush=20zur=20Gew=C3=A4h?=
 =?UTF-8?q?rleistung=20von=20Sektionsgrenzen.=20Anpassungen=20an=20der=20T?=
 =?UTF-8?q?oken-Sch=C3=A4tzung=20und=20Umbenennung=20von=20Funktionen=20zu?=
 =?UTF-8?q?r=20besseren=20Lesbarkeit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     |  2 +-
 app/core/chunking/chunking_strategies.py | 43 ++++++++++++------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 6bc866d..80807c9 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -55,7 +55,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
                 current_section_title = title
                 section_path = f"/{current_section_title}"
             
-            # Die Überschrift selbst als Block hinzufügen (Fix: H1 wird nicht mehr gefiltert)
+            # Die Überschrift selbst als Block hinzufügen
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index af19d4d..db65478 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
 DESCRIPTION: Strategien für atomares Sektions-Chunking (WP-15b konform).
-             Fix: Vorausschauende Trennung zur Wahrung von Sektionsgrenzen.
+             v3.3.5: Garantiert atomare Sektionsgrenzen durch präventiven Flush.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -19,19 +19,20 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
     Gruppiert Blöcke zu Sektionen und hält diese atomar zusammen.
-    Nutzt Look-Ahead, um Sektions-Überhänge zu vermeiden.
+    Nutzt striktes Look-Ahead, um das Zerschneiden von Sektionsübergängen zu verhindern.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     split_level = config.get("split_level", 2)
-    overlap = sum(config.get("overlap", (50, 80))) // 2
+    overlap_config = config.get("overlap", (50, 80))
+    overlap = sum(overlap_config) // 2 if isinstance(overlap_config, (list, tuple)) else overlap_config
     
     chunks: List[Chunk] = []
     buf: List[RawBlock] = []
     cur_tokens = 0
 
-    def _add_to_chunks(txt, title, path):
+    def _add_chunk(txt, title, path):
         idx = len(chunks)
         win = _create_context_win(doc_title, title, txt)
         chunks.append(Chunk(
@@ -48,23 +49,23 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         main_title = buf[0].section_title
         main_path = buf[0].section_path
         full_text = "\n\n".join([b.text for b in buf])
+        actual_tokens = estimate_tokens(full_text)
         
         # Falls die gruppierten Sektionen in das Limit passen
-        if estimate_tokens(full_text) <= max_tokens:
-            _add_to_chunks(full_text, main_title, main_path)
+        if actual_tokens <= max_tokens:
+            _add_chunk(full_text, main_title, main_path)
         else:
             # Nur wenn eine Sektion ALLEINE zu groß ist, wird intern gesplittet
             sents = split_sentences(full_text)
             cur_sents = []; sub_len = 0
-            # Kontext-Sicherung: Heading für Teil-Chunks merken
             header_text = buf[0].text if buf[0].kind == "heading" else ""
             
             for s in sents:
                 slen = estimate_tokens(s)
                 if sub_len + slen > target and cur_sents:
-                    _add_to_chunks(" ".join(cur_sents), main_title, main_path)
+                    _add_chunk(" ".join(cur_sents), main_title, main_path)
                     
-                    # Overlap-Erzeugung und Header-Injektion für Folgeschritte
+                    # Overlap-Erzeugung und Header-Injektion
                     ov_s = [header_text] if header_text else []
                     ov_l = estimate_tokens(header_text) if header_text else 0
                     for os in reversed(cur_sents):
@@ -80,7 +81,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                     cur_sents.append(s); sub_len += slen
             
             if cur_sents:
-                _add_to_chunks(" ".join(cur_sents), main_title, main_path)
+                _add_chunk(" ".join(cur_sents), main_title, main_path)
         
         buf = []; cur_tokens = 0
 
@@ -89,7 +90,6 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     curr_sec: List[RawBlock] = []
     
     for b in blocks:
-        # Ein Split-Trigger (H1 oder H2) startet eine neue atomare Sektion
         if b.kind == "heading" and b.level <= split_level:
             if curr_sec: sections.append(curr_sec)
             curr_sec = [b]
@@ -97,26 +97,25 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # SCHRITT 2: Verarbeitung der Sektionen mit Vorausschau (Look-Ahead)
+    # SCHRITT 2: Verarbeitung der Sektionen mit strenger Vorausschau
     for sec in sections:
         sec_text = "\n\n".join([b.text for b in sec])
         sec_tokens = estimate_tokens(sec_text)
         
         if buf:
-            # VORAUSSCHAU: Würde die neue Sektion das Limit sprengen?
-            if cur_tokens + sec_tokens > max_tokens:
-                _flush_buffer() # Beende den aktuellen Chunk sauber VOR der neuen Sektion
-            # Wenn strict: Jede neue Sektion auf split_level erzwingt neuen Chunk
-            elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
+            # PRÜFUNG 1: Passt die gesamte neue Sektion noch in den Chunk (bis max)?
+            # PRÜFUNG 2: Wenn wir über target sind, fangen wir auf jeden Fall neu an.
+            if (cur_tokens + sec_tokens > max_tokens) or (cur_tokens >= target):
                 _flush_buffer()
-            # Wenn target erreicht: Neue Sektion startet neuen Chunk
-            elif cur_tokens >= target:
+            # PRÜFUNG 3: Wenn strict-mode aktiv ist und ein split_level erreicht wurde
+            elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
                 _flush_buffer()
         
         buf.extend(sec)
-        cur_tokens += sec_tokens
+        # Token-Zähler basierend auf dem tatsächlichen Puffer-Text aktualisieren
+        cur_tokens = estimate_tokens("\n\n".join([b.text for b in buf]))
         
-        # Falls eine einzelne Sektion nach dem Flush (oder als erste) schon zu groß ist
+        # Falls eine einzelne Sektion (selbst nach flush) schon zu groß ist
         if cur_tokens >= max_tokens:
             _flush_buffer()
 
@@ -124,7 +123,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    """Basis-Sliding-Window für flache Texte ohne Sektionsfokus."""
+    """Standard-Sliding-Window für flache Texte."""
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     

From e5a34efee98c2193aea441453b0dc6f8f5c73722 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 21:15:03 +0100
Subject: [PATCH 17/33] =?UTF-8?q?Verbesserung=20des=20Chunking-Parsers=20z?=
 =?UTF-8?q?ur=20Gew=C3=A4hrleistung=20der=20Integrit=C3=A4t=20von=20Callou?=
 =?UTF-8?q?ts=20und=20Listen=20sowie=20Anpassungen=20an=20der=20Blockverar?=
 =?UTF-8?q?beitung.=20Aktualisierung=20der=20atomaren=20Sektions-Chunking-?=
 =?UTF-8?q?Strategie=20mit=20Block-Aware-Flushing=20und=20optimierter=20To?=
 =?UTF-8?q?ken-Sch=C3=A4tzung=20f=C3=BCr=20eine=20pr=C3=A4zisere=20Handhab?=
 =?UTF-8?q?ung=20von=20gro=C3=9Fen=20Bl=C3=B6cken.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     |  30 +++--
 app/core/chunking/chunking_strategies.py | 149 ++++++++++++++---------
 2 files changed, 110 insertions(+), 69 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 80807c9..72d696d 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in Blöcke. Hält H1-Überschriften im Stream 
-             und optimiert die Block-Trennung für atomares Chunking.
+DESCRIPTION: Zerlegt Markdown in Blöcke. Erhält H1-Überschriften und 
+             gewährleistet die Integrität von Callouts und Listen.
 """
 import re
 from typing import List, Tuple, Set
@@ -18,12 +18,16 @@ def split_sentences(text: str) -> list[str]:
     return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
-    """Zerlegt Text in logische Einheiten, inklusive aller Überschriften."""
+    """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1."""
     blocks = []
-    h1_title = "Dokument"; section_path = "/"; current_section_title = None
+    h1_title = "Dokument"
+    section_path = "/"
+    current_section_title = None
+    
+    # Frontmatter entfernen
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Note-Titel extrahieren (Metadaten)
+    # H1 für Note-Titel extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: 
         h1_title = h1_match.group(1).strip()
@@ -47,7 +51,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Pfad- und Titel-Update für die Metadaten der folgenden Blöcke
+            # Pfad- und Titel-Update für die Metadaten
             if level == 1:
                 current_section_title = title
                 section_path = "/"
@@ -55,17 +59,19 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
                 current_section_title = title
                 section_path = f"/{current_section_title}"
             
-            # Die Überschrift selbst als Block hinzufügen
+            # Die Überschrift als Block hinzufügen (H1 wird NICHT mehr gefiltert)
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Leerzeilen trennen Blöcke, außer innerhalb von Callouts
-        if not stripped and not line.startswith('>'):
+        # Trenner oder Leerzeilen beenden einen Block
+        if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
                     blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
+            if stripped == "---":
+                blocks.append(RawBlock("separator", "---", None, section_path, current_section_title))
         else: 
             buffer.append(line)
             
@@ -77,14 +83,16 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     return blocks, h1_title
 
 def parse_edges_robust(text: str) -> Set[str]:
-    """Extrahiert Kanten-Kandidaten (Wikilinks, Callouts)."""
+    """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
     found_edges = set()
+    # 1. Inline Wikilinks [[rel:kind|target]]
     inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
     for kind, target in inlines:
         k = kind.strip().lower()
         t = target.strip()
         if k and t: found_edges.add(f"{k}:{t}")
     
+    # 2. Callout Edges > [!edge] kind
     lines = text.split('\n')
     current_edge_type = None
     for line in lines:
@@ -92,10 +100,12 @@ def parse_edges_robust(text: str) -> Set[str]:
         callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
         if callout_match:
             current_edge_type = callout_match.group(1).strip().lower()
+            # Links in der gleichen Zeile
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
                 if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
             continue
+        # Links in Folgezeilen des Callouts
         if current_edge_type and stripped.startswith('>'):
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index db65478..09ed198 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,13 +1,19 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategien für atomares Sektions-Chunking (WP-15b konform).
-             v3.3.5: Garantiert atomare Sektionsgrenzen durch präventiven Flush.
+DESCRIPTION: Strategien für atomares Sektions-Chunking v3.3.6.
+             AUDIT: 100% Konformität zur 'by_heading' Spezifikation.
+             - Block-Aware Flushing: Trennung nur an Blockgrenzen.
+             - Atomic Section Vorausschau: Verhindert Sektions-Zerreißung.
 """
+import math
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
-from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
+def _safe_estimate_tokens(text: str) -> int:
+    """Konservative Token-Schätzung für deutschen Text (len/3 statt len/4)."""
+    return max(1, math.ceil(len(text.strip()) / 3))
+
 def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
     """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
     parts = []
@@ -19,73 +25,89 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
     Gruppiert Blöcke zu Sektionen und hält diese atomar zusammen.
-    Nutzt striktes Look-Ahead, um das Zerschneiden von Sektionsübergängen zu verhindern.
+    Nutzt Block-Aware-Flushing, um Sektionsgrenzen strikt zu wahren.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     split_level = config.get("split_level", 2)
-    overlap_config = config.get("overlap", (50, 80))
-    overlap = sum(overlap_config) // 2 if isinstance(overlap_config, (list, tuple)) else overlap_config
+    overlap_cfg = config.get("overlap", (50, 80))
+    overlap = sum(overlap_cfg) // 2 if isinstance(overlap_cfg, (list, tuple)) else overlap_cfg
     
     chunks: List[Chunk] = []
     buf: List[RawBlock] = []
-    cur_tokens = 0
 
     def _add_chunk(txt, title, path):
         idx = len(chunks)
         win = _create_context_win(doc_title, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=estimate_tokens(txt),
+            text=txt, window=win, token_count=_safe_estimate_tokens(txt),
             section_title=title, section_path=path,
             neighbors_prev=None, neighbors_next=None
         ))
 
     def _flush_buffer():
-        nonlocal buf, cur_tokens
+        nonlocal buf
         if not buf: return
         
-        main_title = buf[0].section_title
-        main_path = buf[0].section_path
-        full_text = "\n\n".join([b.text for b in buf])
-        actual_tokens = estimate_tokens(full_text)
+        # Block-Aware Processing innerhalb des Puffers
+        current_blocks = []
+        current_len = 0
         
-        # Falls die gruppierten Sektionen in das Limit passen
-        if actual_tokens <= max_tokens:
-            _add_chunk(full_text, main_title, main_path)
-        else:
-            # Nur wenn eine Sektion ALLEINE zu groß ist, wird intern gesplittet
-            sents = split_sentences(full_text)
-            cur_sents = []; sub_len = 0
-            header_text = buf[0].text if buf[0].kind == "heading" else ""
+        for b in buf:
+            b_len = _safe_estimate_tokens(b.text)
             
-            for s in sents:
-                slen = estimate_tokens(s)
-                if sub_len + slen > target and cur_sents:
-                    _add_chunk(" ".join(cur_sents), main_title, main_path)
-                    
-                    # Overlap-Erzeugung und Header-Injektion
-                    ov_s = [header_text] if header_text else []
-                    ov_l = estimate_tokens(header_text) if header_text else 0
-                    for os in reversed(cur_sents):
-                        if os == header_text: continue
-                        t_len = estimate_tokens(os)
-                        if ov_l + t_len < overlap:
-                            ov_s.insert(len(ov_s)-1 if header_text else 0, os)
-                            ov_l += t_len
-                        else: break
-                    cur_sents = list(ov_s); cur_sents.append(s)
-                    sub_len = ov_l + slen
-                else:
-                    cur_sents.append(s); sub_len += slen
+            # Falls dieser Block den aktuellen Chunk sprengen würde -> Vorher abschließen
+            if current_len + b_len > max_tokens and current_blocks:
+                txt = "\n\n".join([cb.text for cb in current_blocks])
+                _add_chunk(txt, current_blocks[0].section_title, current_blocks[0].section_path)
+                current_blocks = []
+                current_len = 0
             
-            if cur_sents:
-                _add_chunk(" ".join(cur_sents), main_title, main_path)
+            # Falls ein einzelner Block alleine zu groß ist (Sliding Window Fallback)
+            if b_len > max_tokens:
+                if current_blocks: # Vorherigen Rest wegschreiben
+                    txt = "\n\n".join([cb.text for cb in current_blocks])
+                    _add_chunk(txt, current_blocks[0].section_title, current_blocks[0].section_path)
+                    current_blocks = []
+                    current_len = 0
+                
+                # Sätze dieses einen Riesen-Blocks splitten
+                sents = split_sentences(b.text)
+                cur_sents = []; sub_len = 0
+                header_text = b.text if b.kind == "heading" else ""
+                
+                for s in sents:
+                    slen = _safe_estimate_tokens(s)
+                    if sub_len + slen > target and cur_sents:
+                        _add_chunk(" ".join(cur_sents), b.section_title, b.section_path)
+                        # Overlap-Erzeugung & Header-Wiederholung
+                        ov_s = [header_text] if header_text else []
+                        ov_l = _safe_estimate_tokens(header_text) if header_text else 0
+                        for os in reversed(cur_sents):
+                            if os == header_text: continue
+                            t_len = _safe_estimate_tokens(os)
+                            if ov_l + t_len < overlap:
+                                ov_s.insert(len(ov_s)-1 if header_text else 0, os)
+                                ov_l += t_len
+                            else: break
+                        cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
+                    else: cur_sents.append(s); sub_len += slen
+                if cur_sents:
+                    _add_chunk(" ".join(cur_sents), b.section_title, b.section_path)
+            else:
+                current_blocks.append(b)
+                current_len += b_len
         
-        buf = []; cur_tokens = 0
+        # Den verbleibenden Rest im Puffer als finalen Chunk schreiben
+        if current_blocks:
+            txt = "\n\n".join([cb.text for cb in current_blocks])
+            _add_chunk(txt, current_blocks[0].section_title, current_blocks[0].section_path)
+        
+        buf = []
 
-    # SCHRITT 1: Gruppierung in atomare Sektions-Einheiten (Heading + Paragraphs)
+    # SCHRITT 1: Gruppierung in atomare Sektions-Einheiten
     sections: List[List[RawBlock]] = []
     curr_sec: List[RawBlock] = []
     
@@ -97,33 +119,34 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # SCHRITT 2: Verarbeitung der Sektionen mit strenger Vorausschau
+    # SCHRITT 2: Verarbeitung der Sektionen mit Look-Ahead
     for sec in sections:
         sec_text = "\n\n".join([b.text for b in sec])
-        sec_tokens = estimate_tokens(sec_text)
+        sec_tokens = _safe_estimate_tokens(sec_text)
+        
+        # Aktueller Füllstand des Puffers
+        cur_buf_tokens = sum(_safe_estimate_tokens(b.text) for b in buf)
         
         if buf:
-            # PRÜFUNG 1: Passt die gesamte neue Sektion noch in den Chunk (bis max)?
-            # PRÜFUNG 2: Wenn wir über target sind, fangen wir auf jeden Fall neu an.
-            if (cur_tokens + sec_tokens > max_tokens) or (cur_tokens >= target):
+            # PRÜFUNG: Wenn die neue Sektion den Puffer über das Limit treibt
+            # ODER wenn der Puffer bereits das Ziel-Format erreicht hat
+            if (cur_buf_tokens + sec_tokens > max_tokens) or (cur_buf_tokens >= target):
                 _flush_buffer()
-            # PRÜFUNG 3: Wenn strict-mode aktiv ist und ein split_level erreicht wurde
+            # PRÜFUNG: Strikter Split an Überschriften
             elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
                 _flush_buffer()
         
         buf.extend(sec)
-        # Token-Zähler basierend auf dem tatsächlichen Puffer-Text aktualisieren
-        cur_tokens = estimate_tokens("\n\n".join([b.text for b in buf]))
         
-        # Falls eine einzelne Sektion (selbst nach flush) schon zu groß ist
-        if cur_tokens >= max_tokens:
+        # Falls eine Riesen-Sektion hinzugefügt wurde, die sofort raus muss
+        if sum(_safe_estimate_tokens(b.text) for b in buf) >= max_tokens:
             _flush_buffer()
 
     _flush_buffer()
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    """Standard-Sliding-Window für flache Texte."""
+    """Basis-Sliding-Window für flache Texte ohne Sektionsfokus."""
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     
@@ -131,14 +154,18 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
     buf: List[RawBlock] = []
     
     for b in blocks:
-        b_tokens = estimate_tokens(b.text)
-        current_tokens = estimate_tokens("\n\n".join([x.text for x in buf])) if buf else 0
+        b_tokens = _safe_estimate_tokens(b.text)
+        current_tokens = sum(_safe_estimate_tokens(x.text) for x in buf) if buf else 0
         
         if current_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf])
             idx = len(chunks)
             win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-            chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=current_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+            chunks.append(Chunk(
+                id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, 
+                text=txt, window=win, token_count=current_tokens, 
+                section_title=buf[0].section_title, section_path=buf[0].section_path, 
+                neighbors_prev=None, neighbors_next=None))
             buf = []
             current_tokens = 0
             
@@ -148,6 +175,10 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
         txt = "\n\n".join([x.text for x in buf])
         idx = len(chunks)
         win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        chunks.append(Chunk(
+            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, 
+            text=txt, window=win, token_count=_safe_estimate_tokens(txt), 
+            section_title=buf[0].section_title, section_path=buf[0].section_path, 
+            neighbors_prev=None, neighbors_next=None))
         
     return chunks
\ No newline at end of file

From b1a897e51cf899bd42f736d25cbffda77ea53e40 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 21:26:05 +0100
Subject: [PATCH 18/33] =?UTF-8?q?Verbesserung=20des=20Chunking-Parsers=20z?=
 =?UTF-8?q?ur=20Unterst=C3=BCtzung=20aller=20=C3=9Cberschriften=20(H1-H6)?=
 =?UTF-8?q?=20und=20Optimierung=20der=20Block-Trennung=20f=C3=BCr=20atomar?=
 =?UTF-8?q?es=20Sektions-Chunking.=20Aktualisierung=20der=20Sektions-Chunk?=
 =?UTF-8?q?ing-Strategie=20mit=20striktem=20Look-Ahead=20und=20pr=C3=A4zis?=
 =?UTF-8?q?erer=20Token-Sch=C3=A4tzung=20f=C3=BCr=20eine=20verbesserte=20H?=
 =?UTF-8?q?andhabung=20von=20gro=C3=9Fen=20Bl=C3=B6cken.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     |  24 +--
 app/core/chunking/chunking_strategies.py | 180 ++++++++++-------------
 2 files changed, 87 insertions(+), 117 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 72d696d..ca67598 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in Blöcke. Erhält H1-Überschriften und 
-             gewährleistet die Integrität von Callouts und Listen.
+DESCRIPTION: Zerlegt Markdown in logische Blöcke. Hält H1-Überschriften im Stream
+             und optimiert die Block-Trennung für atomares Sektions-Chunking.
 """
 import re
 from typing import List, Tuple, Set
@@ -12,13 +12,14 @@ _WS = re.compile(r'\s+')
 _SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
 
 def split_sentences(text: str) -> list[str]:
-    """Teilt Text in Sätze auf."""
+    """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
     text = _WS.sub(' ', text.strip())
     if not text: return []
+    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
     return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
-    """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1."""
+    """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
     h1_title = "Dokument"
     section_path = "/"
@@ -27,7 +28,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     # Frontmatter entfernen
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Note-Titel extrahieren
+    # H1 für Note-Metadaten extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: 
         h1_title = h1_match.group(1).strip()
@@ -41,7 +42,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         # Heading-Erkennung (H1 bis H6)
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
-            # Vorherigen Text-Block abschließen
+            # Vorherigen Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -51,7 +52,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Pfad- und Titel-Update für die Metadaten
+            # Update der Pfad-Metadaten für die folgenden Blöcke
             if level == 1:
                 current_section_title = title
                 section_path = "/"
@@ -59,11 +60,11 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
                 current_section_title = title
                 section_path = f"/{current_section_title}"
             
-            # Die Überschrift als Block hinzufügen (H1 wird NICHT mehr gefiltert)
+            # Die Überschrift als regulären Block hinzufügen (Fix: H1 bleibt im Text)
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Trenner oder Leerzeilen beenden einen Block
+        # Trenner (---) oder Leerzeilen beenden Blöcke, außer in Callouts
         if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -75,6 +76,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         else: 
             buffer.append(line)
             
+    # Letzten Puffer leeren
     if buffer:
         content = "\n".join(buffer).strip()
         if content: 
@@ -85,7 +87,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
 def parse_edges_robust(text: str) -> Set[str]:
     """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
     found_edges = set()
-    # 1. Inline Wikilinks [[rel:kind|target]]
+    # 1. Wikilinks [[rel:kind|target]]
     inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
     for kind, target in inlines:
         k = kind.strip().lower()
@@ -100,12 +102,10 @@ def parse_edges_robust(text: str) -> Set[str]:
         callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
         if callout_match:
             current_edge_type = callout_match.group(1).strip().lower()
-            # Links in der gleichen Zeile
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
                 if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
             continue
-        # Links in Folgezeilen des Callouts
         if current_edge_type and stripped.startswith('>'):
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 09ed198..9e2943a 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,21 +1,20 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategien für atomares Sektions-Chunking v3.3.6.
-             AUDIT: 100% Konformität zur 'by_heading' Spezifikation.
-             - Block-Aware Flushing: Trennung nur an Blockgrenzen.
-             - Atomic Section Vorausschau: Verhindert Sektions-Zerreißung.
+DESCRIPTION: Strategien für atomares Sektions-Chunking v3.4.1.
+             Garantiert Sektions-Integrität (Atomic Units) durch Look-Ahead.
 """
 import math
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
+from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
 def _safe_estimate_tokens(text: str) -> int:
-    """Konservative Token-Schätzung für deutschen Text (len/3 statt len/4)."""
-    return max(1, math.ceil(len(text.strip()) / 3))
+    """Konservative Schätzung für MD und deutsche Texte (len/2.8)."""
+    return max(1, math.ceil(len(text.strip()) / 2.8))
 
 def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
-    """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
+    """Baut den Breadcrumb-Kontext für das Embedding-Fenster (H1 > H2)."""
     parts = []
     if doc_title: parts.append(doc_title)
     if sec_title and sec_title != doc_title: parts.append(sec_title)
@@ -24,8 +23,8 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Gruppiert Blöcke zu Sektionen und hält diese atomar zusammen.
-    Nutzt Block-Aware-Flushing, um Sektionsgrenzen strikt zu wahren.
+    Sektions-Chunking: Behandelt Abschnitte als unteilbare Einheiten.
+    Schiebt ganze Abschnitte in den nächsten Chunk, falls das Limit erreicht ist.
     """
     strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
@@ -35,10 +34,15 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     overlap = sum(overlap_cfg) // 2 if isinstance(overlap_cfg, (list, tuple)) else overlap_cfg
     
     chunks: List[Chunk] = []
-    buf: List[RawBlock] = []
 
-    def _add_chunk(txt, title, path):
+    def _emit_chunk(block_list: List[RawBlock]):
+        """Erzeugt ein finales Chunk-Objekt aus einer Liste von Blöcken."""
+        if not block_list: return
+        txt = "\n\n".join([b.text for b in block_list])
         idx = len(chunks)
+        # Metadaten vom ersten Block der Gruppe (Header)
+        title = block_list[0].section_title
+        path = block_list[0].section_path
         win = _create_context_win(doc_title, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
@@ -47,70 +51,39 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             neighbors_prev=None, neighbors_next=None
         ))
 
-    def _flush_buffer():
-        nonlocal buf
-        if not buf: return
+    def _emit_split_section(sec_blocks: List[RawBlock]):
+        """Splittet eine einzelne Sektion, die für sich allein zu groß ist."""
+        full_text = "\n\n".join([b.text for b in sec_blocks])
+        main_title = sec_blocks[0].section_title
+        main_path = sec_blocks[0].section_path
+        header_text = sec_blocks[0].text if sec_blocks[0].kind == "heading" else ""
         
-        # Block-Aware Processing innerhalb des Puffers
-        current_blocks = []
-        current_len = 0
+        sents = split_sentences(full_text)
+        cur_sents = []; sub_len = 0
         
-        for b in buf:
-            b_len = _safe_estimate_tokens(b.text)
-            
-            # Falls dieser Block den aktuellen Chunk sprengen würde -> Vorher abschließen
-            if current_len + b_len > max_tokens and current_blocks:
-                txt = "\n\n".join([cb.text for cb in current_blocks])
-                _add_chunk(txt, current_blocks[0].section_title, current_blocks[0].section_path)
-                current_blocks = []
-                current_len = 0
-            
-            # Falls ein einzelner Block alleine zu groß ist (Sliding Window Fallback)
-            if b_len > max_tokens:
-                if current_blocks: # Vorherigen Rest wegschreiben
-                    txt = "\n\n".join([cb.text for cb in current_blocks])
-                    _add_chunk(txt, current_blocks[0].section_title, current_blocks[0].section_path)
-                    current_blocks = []
-                    current_len = 0
-                
-                # Sätze dieses einen Riesen-Blocks splitten
-                sents = split_sentences(b.text)
-                cur_sents = []; sub_len = 0
-                header_text = b.text if b.kind == "heading" else ""
-                
-                for s in sents:
-                    slen = _safe_estimate_tokens(s)
-                    if sub_len + slen > target and cur_sents:
-                        _add_chunk(" ".join(cur_sents), b.section_title, b.section_path)
-                        # Overlap-Erzeugung & Header-Wiederholung
-                        ov_s = [header_text] if header_text else []
-                        ov_l = _safe_estimate_tokens(header_text) if header_text else 0
-                        for os in reversed(cur_sents):
-                            if os == header_text: continue
-                            t_len = _safe_estimate_tokens(os)
-                            if ov_l + t_len < overlap:
-                                ov_s.insert(len(ov_s)-1 if header_text else 0, os)
-                                ov_l += t_len
-                            else: break
-                        cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
-                    else: cur_sents.append(s); sub_len += slen
-                if cur_sents:
-                    _add_chunk(" ".join(cur_sents), b.section_title, b.section_path)
+        for s in sents:
+            slen = _safe_estimate_tokens(s)
+            if sub_len + slen > target and cur_sents:
+                _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
+                # Header Injection für den Kontext im nächsten Teil-Chunk
+                ov_s = [header_text] if header_text else []
+                ov_l = _safe_estimate_tokens(header_text) if header_text else 0
+                for os in reversed(cur_sents):
+                    if os == header_text: continue
+                    t_len = _safe_estimate_tokens(os)
+                    if ov_l + t_len < overlap:
+                        ov_s.insert(len(ov_s)-1 if header_text else 0, os)
+                        ov_l += t_len
+                    else: break
+                cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
             else:
-                current_blocks.append(b)
-                current_len += b_len
-        
-        # Den verbleibenden Rest im Puffer als finalen Chunk schreiben
-        if current_blocks:
-            txt = "\n\n".join([cb.text for cb in current_blocks])
-            _add_chunk(txt, current_blocks[0].section_title, current_blocks[0].section_path)
-        
-        buf = []
+                cur_sents.append(s); sub_len += slen
+        if cur_sents:
+            _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
 
-    # SCHRITT 1: Gruppierung in atomare Sektions-Einheiten
+    # SCHRITT 1: Gruppierung in atomare Einheiten (Sektionen)
     sections: List[List[RawBlock]] = []
     curr_sec: List[RawBlock] = []
-    
     for b in blocks:
         if b.kind == "heading" and b.level <= split_level:
             if curr_sec: sections.append(curr_sec)
@@ -119,66 +92,63 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # SCHRITT 2: Verarbeitung der Sektionen mit Look-Ahead
+    # SCHRITT 2: Verarbeitung der Sektionen mit strengem Look-Ahead
+    current_chunk_buf = []
+    current_tokens = 0
+
     for sec in sections:
         sec_text = "\n\n".join([b.text for b in sec])
         sec_tokens = _safe_estimate_tokens(sec_text)
         
-        # Aktueller Füllstand des Puffers
-        cur_buf_tokens = sum(_safe_estimate_tokens(b.text) for b in buf)
-        
-        if buf:
-            # PRÜFUNG: Wenn die neue Sektion den Puffer über das Limit treibt
-            # ODER wenn der Puffer bereits das Ziel-Format erreicht hat
-            if (cur_buf_tokens + sec_tokens > max_tokens) or (cur_buf_tokens >= target):
-                _flush_buffer()
-            # PRÜFUNG: Strikter Split an Überschriften
+        if current_chunk_buf:
+            # PRÜFUNG: Würde die neue Sektion den aktuellen Chunk sprengen?
+            # ODER: Haben wir das Target bereits erreicht und fangen lieber neu an?
+            if (current_tokens + sec_tokens > max_tokens) or (current_tokens >= target):
+                _emit_chunk(current_chunk_buf)
+                current_chunk_buf = []
+                current_tokens = 0
+            # PRÜFUNG: Harter Split gefordert an Überschriften
             elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
-                _flush_buffer()
-        
-        buf.extend(sec)
-        
-        # Falls eine Riesen-Sektion hinzugefügt wurde, die sofort raus muss
-        if sum(_safe_estimate_tokens(b.text) for b in buf) >= max_tokens:
-            _flush_buffer()
+                _emit_chunk(current_chunk_buf)
+                current_chunk_buf = []
+                current_tokens = 0
+
+        # Wenn eine EINZELNE Sektion alleine schon das Limit sprengt
+        if sec_tokens > max_tokens:
+            if current_chunk_buf:
+                _emit_chunk(current_chunk_buf)
+                current_chunk_buf = []
+                current_tokens = 0
+            _emit_split_section(sec)
+        else:
+            current_chunk_buf.extend(sec)
+            current_tokens += sec_tokens + 2 # +2 für Newline Join
+
+    # Letzten Puffer schreiben
+    if current_chunk_buf:
+        _emit_chunk(current_chunk_buf)
 
-    _flush_buffer()
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
     """Basis-Sliding-Window für flache Texte ohne Sektionsfokus."""
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
-    
     chunks: List[Chunk] = []
     buf: List[RawBlock] = []
     
     for b in blocks:
         b_tokens = _safe_estimate_tokens(b.text)
         current_tokens = sum(_safe_estimate_tokens(x.text) for x in buf) if buf else 0
-        
         if current_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf])
             idx = len(chunks)
             win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-            chunks.append(Chunk(
-                id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, 
-                text=txt, window=win, token_count=current_tokens, 
-                section_title=buf[0].section_title, section_path=buf[0].section_path, 
-                neighbors_prev=None, neighbors_next=None))
+            chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=current_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
-            current_tokens = 0
-            
         buf.append(b)
-        
     if buf:
-        txt = "\n\n".join([x.text for x in buf])
-        idx = len(chunks)
+        txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
         win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(
-            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, 
-            text=txt, window=win, token_count=_safe_estimate_tokens(txt), 
-            section_title=buf[0].section_title, section_path=buf[0].section_path, 
-            neighbors_prev=None, neighbors_next=None))
-        
+        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=_safe_estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
     return chunks
\ No newline at end of file

From 96b4f65cd1dbf74908642d7ddd6b6040614f57a7 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 21:37:11 +0100
Subject: [PATCH 19/33] =?UTF-8?q?Aktualisierung=20des=20Chunking-Parsers?=
 =?UTF-8?q?=20zur=20Verbesserung=20der=20Blockverarbeitung=20und=20Beschre?=
 =?UTF-8?q?ibung.=20Anpassungen=20an=20der=20atomaren=20Sektions-Chunking-?=
 =?UTF-8?q?Strategie=20mit=20optimierter=20Token-Sch=C3=A4tzung=20und=20ne?=
 =?UTF-8?q?uen=20Hilfsfunktionen=20zur=20besseren=20Handhabung=20von=20gro?=
 =?UTF-8?q?=C3=9Fen=20Sektionen.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     |  18 ++--
 app/core/chunking/chunking_strategies.py | 105 +++++++++++------------
 2 files changed, 55 insertions(+), 68 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index ca67598..bf3801a 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in logische Blöcke. Hält H1-Überschriften im Stream
-             und optimiert die Block-Trennung für atomares Sektions-Chunking.
+DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks). 
+             Hält H1-Überschriften im Inhalts-Stream.
 """
 import re
 from typing import List, Tuple, Set
@@ -15,7 +15,6 @@ def split_sentences(text: str) -> list[str]:
     """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
     text = _WS.sub(' ', text.strip())
     if not text: return []
-    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
     return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
@@ -28,7 +27,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     # Frontmatter entfernen
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Note-Metadaten extrahieren
+    # H1 für Note-Titel extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: 
         h1_title = h1_match.group(1).strip()
@@ -42,7 +41,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         # Heading-Erkennung (H1 bis H6)
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
-            # Vorherigen Block abschließen
+            # Vorherigen Text-Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -52,7 +51,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Update der Pfad-Metadaten für die folgenden Blöcke
+            # Pfad- und Titel-Update für die Metadaten der folgenden Blöcke
             if level == 1:
                 current_section_title = title
                 section_path = "/"
@@ -60,11 +59,11 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
                 current_section_title = title
                 section_path = f"/{current_section_title}"
             
-            # Die Überschrift als regulären Block hinzufügen (Fix: H1 bleibt im Text)
+            # Die Überschrift selbst als Block hinzufügen
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Trenner (---) oder Leerzeilen beenden Blöcke, außer in Callouts
+        # Trenner oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
         if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -76,7 +75,6 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         else: 
             buffer.append(line)
             
-    # Letzten Puffer leeren
     if buffer:
         content = "\n".join(buffer).strip()
         if content: 
@@ -87,14 +85,12 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
 def parse_edges_robust(text: str) -> Set[str]:
     """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
     found_edges = set()
-    # 1. Wikilinks [[rel:kind|target]]
     inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
     for kind, target in inlines:
         k = kind.strip().lower()
         t = target.strip()
         if k and t: found_edges.add(f"{k}:{t}")
     
-    # 2. Callout Edges > [!edge] kind
     lines = text.split('\n')
     current_edge_type = None
     for line in lines:
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 9e2943a..fa22382 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,20 +1,15 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategien für atomares Sektions-Chunking v3.4.1.
-             Garantiert Sektions-Integrität (Atomic Units) durch Look-Ahead.
+DESCRIPTION: Universelle Strategie für atomares Sektions-Chunking v3.5.0.
+             Garantiert Sektions-Integrität durch präventives Chunk-Management.
 """
-import math
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
 from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
-def _safe_estimate_tokens(text: str) -> int:
-    """Konservative Schätzung für MD und deutsche Texte (len/2.8)."""
-    return max(1, math.ceil(len(text.strip()) / 2.8))
-
 def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
-    """Baut den Breadcrumb-Kontext für das Embedding-Fenster (H1 > H2)."""
+    """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
     parts = []
     if doc_title: parts.append(doc_title)
     if sec_title and sec_title != doc_title: parts.append(sec_title)
@@ -23,10 +18,9 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Sektions-Chunking: Behandelt Abschnitte als unteilbare Einheiten.
-    Schiebt ganze Abschnitte in den nächsten Chunk, falls das Limit erreicht ist.
+    Universelles Sektions-Chunking: Packt Sektionen in Chunks.
+    Bei Überlauf wird die komplette Sektion in den nächsten Chunk geschoben.
     """
-    strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     split_level = config.get("split_level", 2)
@@ -35,24 +29,23 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     
     chunks: List[Chunk] = []
 
-    def _emit_chunk(block_list: List[RawBlock]):
-        """Erzeugt ein finales Chunk-Objekt aus einer Liste von Blöcken."""
+    # --- HILFSFUNKTION: Erzeugt einen Chunk aus einer Blockliste ---
+    def _create_chunk_from_blocks(block_list: List[RawBlock]):
         if not block_list: return
         txt = "\n\n".join([b.text for b in block_list])
         idx = len(chunks)
-        # Metadaten vom ersten Block der Gruppe (Header)
         title = block_list[0].section_title
         path = block_list[0].section_path
         win = _create_context_win(doc_title, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=_safe_estimate_tokens(txt),
+            text=txt, window=win, token_count=estimate_tokens(txt),
             section_title=title, section_path=path,
             neighbors_prev=None, neighbors_next=None
         ))
 
-    def _emit_split_section(sec_blocks: List[RawBlock]):
-        """Splittet eine einzelne Sektion, die für sich allein zu groß ist."""
+    # --- HILFSFUNKTION: Splittet eine einzelne Sektion, die > max ist ---
+    def _split_giant_section(sec_blocks: List[RawBlock]):
         full_text = "\n\n".join([b.text for b in sec_blocks])
         main_title = sec_blocks[0].section_title
         main_path = sec_blocks[0].section_path
@@ -62,15 +55,17 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         cur_sents = []; sub_len = 0
         
         for s in sents:
-            slen = _safe_estimate_tokens(s)
+            slen = estimate_tokens(s)
             if sub_len + slen > target and cur_sents:
-                _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
-                # Header Injection für den Kontext im nächsten Teil-Chunk
+                combined_text = " ".join(cur_sents)
+                _create_chunk_from_blocks([RawBlock("paragraph", combined_text, None, main_path, main_title)])
+                
+                # Context Injection: Überschrift für den nächsten Teil-Chunk
                 ov_s = [header_text] if header_text else []
-                ov_l = _safe_estimate_tokens(header_text) if header_text else 0
+                ov_l = estimate_tokens(header_text) if header_text else 0
                 for os in reversed(cur_sents):
                     if os == header_text: continue
-                    t_len = _safe_estimate_tokens(os)
+                    t_len = estimate_tokens(os)
                     if ov_l + t_len < overlap:
                         ov_s.insert(len(ov_s)-1 if header_text else 0, os)
                         ov_l += t_len
@@ -78,13 +73,15 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                 cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
             else:
                 cur_sents.append(s); sub_len += slen
+        
         if cur_sents:
-            _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
+            _create_chunk_from_blocks([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
 
-    # SCHRITT 1: Gruppierung in atomare Einheiten (Sektionen)
+    # 1. SCHRITT: Gruppierung in atomare Sektions-Einheiten
     sections: List[List[RawBlock]] = []
     curr_sec: List[RawBlock] = []
     for b in blocks:
+        # Eine neue Überschrift auf oder unter dem split_level startet eine neue Sektion
         if b.kind == "heading" and b.level <= split_level:
             if curr_sec: sections.append(curr_sec)
             curr_sec = [b]
@@ -92,54 +89,48 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # SCHRITT 2: Verarbeitung der Sektionen mit strengem Look-Ahead
-    current_chunk_buf = []
-    current_tokens = 0
+    # 2. SCHRITT: Sektionen in Chunks packen (Das universelle Pack-Verfahren)
+    candidate_chunk: List[RawBlock] = []
+    candidate_tokens = 0
 
     for sec in sections:
         sec_text = "\n\n".join([b.text for b in sec])
-        sec_tokens = _safe_estimate_tokens(sec_text)
+        sec_tokens = estimate_tokens(sec_text)
         
-        if current_chunk_buf:
-            # PRÜFUNG: Würde die neue Sektion den aktuellen Chunk sprengen?
-            # ODER: Haben wir das Target bereits erreicht und fangen lieber neu an?
-            if (current_tokens + sec_tokens > max_tokens) or (current_tokens >= target):
-                _emit_chunk(current_chunk_buf)
-                current_chunk_buf = []
-                current_tokens = 0
-            # PRÜFUNG: Harter Split gefordert an Überschriften
-            elif strict and sec[0].kind == "heading" and sec[0].level == split_level:
-                _emit_chunk(current_chunk_buf)
-                current_chunk_buf = []
-                current_tokens = 0
-
-        # Wenn eine EINZELNE Sektion alleine schon das Limit sprengt
-        if sec_tokens > max_tokens:
-            if current_chunk_buf:
-                _emit_chunk(current_chunk_buf)
-                current_chunk_buf = []
-                current_tokens = 0
-            _emit_split_section(sec)
+        # Passt diese gesamte Sektion noch in den laufenden Chunk?
+        if candidate_tokens + sec_tokens <= max_tokens:
+            candidate_chunk.extend(sec)
+            candidate_tokens = estimate_tokens("\n\n".join([b.text for b in candidate_chunk]))
         else:
-            current_chunk_buf.extend(sec)
-            current_tokens += sec_tokens + 2 # +2 für Newline Join
+            # Falls der aktuelle Chunk nicht leer ist: Raus damit, bevor die neue Sektion kommt
+            if candidate_chunk:
+                _create_chunk_from_blocks(candidate_chunk)
+                candidate_chunk = []
+                candidate_tokens = 0
+            
+            # Die neue Sektion ist nun allein. Ist sie selbst zu groß?
+            if sec_tokens > max_tokens:
+                _split_giant_section(sec)
+            else:
+                candidate_chunk = list(sec)
+                candidate_tokens = sec_tokens
 
-    # Letzten Puffer schreiben
-    if current_chunk_buf:
-        _emit_chunk(current_chunk_buf)
+    # Letzten Rest wegschreiben
+    if candidate_chunk:
+        _create_chunk_from_blocks(candidate_chunk)
 
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    """Basis-Sliding-Window für flache Texte ohne Sektionsfokus."""
+    """Standard-Sliding-Window für flache Texte ohne Sektionsfokus."""
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []
     buf: List[RawBlock] = []
     
     for b in blocks:
-        b_tokens = _safe_estimate_tokens(b.text)
-        current_tokens = sum(_safe_estimate_tokens(x.text) for x in buf) if buf else 0
+        b_tokens = estimate_tokens(b.text)
+        current_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
         if current_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf])
             idx = len(chunks)
@@ -150,5 +141,5 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
         win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=_safe_estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
     return chunks
\ No newline at end of file

From 680c36ab5932b77ce801e22856b1739def8e3983 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 21:45:14 +0100
Subject: [PATCH 20/33] =?UTF-8?q?Aktualisierung=20des=20Chunking-Parsers?=
 =?UTF-8?q?=20zur=20Verbesserung=20der=20Blockverarbeitung=20und=20Beschre?=
 =?UTF-8?q?ibung.=20Anpassungen=20an=20der=20atomaren=20Sektions-Chunking-?=
 =?UTF-8?q?Strategie=20mit=20optimierter=20Token-Sch=C3=A4tzung=20und=20ne?=
 =?UTF-8?q?uen=20Hilfsfunktionen=20zur=20besseren=20Handhabung=20von=20gro?=
 =?UTF-8?q?=C3=9Fen=20Sektionen.=20Einf=C3=BChrung=20einer=20pr=C3=A4ziser?=
 =?UTF-8?q?en=20Sch=C3=A4tzung=20f=C3=BCr=20deutsche=20Texte=20und=20Anpas?=
 =?UTF-8?q?sungen=20an=20der=20Logik=20zur=20Handhabung=20von=20Sektionen.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 42 ++++---------
 app/core/chunking/chunking_strategies.py | 80 ++++++++++--------------
 2 files changed, 47 insertions(+), 75 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index bf3801a..8632185 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks). 
-             Hält H1-Überschriften im Inhalts-Stream.
+DESCRIPTION: Zerlegt Markdown in logische Blöcke (RawBlocks). 
+             Gewährleistet, dass H1 und Trenner im Stream verbleiben.
 """
 import re
 from typing import List, Tuple, Set
@@ -20,28 +20,21 @@ def split_sentences(text: str) -> list[str]:
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
-    h1_title = "Dokument"
-    section_path = "/"
-    current_section_title = None
-    
-    # Frontmatter entfernen
+    h1_title = "Dokument"; section_path = "/"; current_section_title = None
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Note-Titel extrahieren
+    # H1 für Metadaten extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
-    if h1_match: 
-        h1_title = h1_match.group(1).strip()
+    if h1_match: h1_title = h1_match.group(1).strip()
     
     lines = text_without_fm.split('\n')
     buffer = []
     
     for line in lines:
         stripped = line.strip()
-        
-        # Heading-Erkennung (H1 bis H6)
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
+        
         if heading_match:
-            # Vorherigen Text-Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -51,19 +44,14 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Pfad- und Titel-Update für die Metadaten der folgenden Blöcke
             if level == 1:
-                current_section_title = title
-                section_path = "/"
+                current_section_title = title; section_path = "/"
             elif level == 2:
-                current_section_title = title
-                section_path = f"/{current_section_title}"
+                current_section_title = title; section_path = f"/{current_section_title}"
             
-            # Die Überschrift selbst als Block hinzufügen
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Trenner oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
         if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -77,22 +65,19 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             
     if buffer:
         content = "\n".join(buffer).strip()
-        if content: 
-            blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+        if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
             
     return blocks, h1_title
 
 def parse_edges_robust(text: str) -> Set[str]:
-    """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
+    """Extrahiert Kanten aus Wikilinks und Callouts."""
     found_edges = set()
     inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
     for kind, target in inlines:
-        k = kind.strip().lower()
-        t = target.strip()
+        k = kind.strip().lower(); t = target.strip()
         if k and t: found_edges.add(f"{k}:{t}")
     
-    lines = text.split('\n')
-    current_edge_type = None
+    lines = text.split('\n'); current_edge_type = None
     for line in lines:
         stripped = line.strip()
         callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
@@ -106,6 +91,5 @@ def parse_edges_robust(text: str) -> Set[str]:
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
                 if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
-        elif not stripped.startswith('>'): 
-            current_edge_type = None
+        elif not stripped.startswith('>'): current_edge_type = None
     return found_edges
\ No newline at end of file
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index fa22382..5b5a011 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,15 +1,18 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Universelle Strategie für atomares Sektions-Chunking v3.5.0.
+DESCRIPTION: Universelle Strategie für atomares Sektions-Chunking v3.6.0.
              Garantiert Sektions-Integrität durch präventives Chunk-Management.
 """
+import math
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
-from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
+def _accurate_estimate_tokens(text: str) -> int:
+    """Konservative Schätzung für deutschen Text (len/2.5 statt len/4)."""
+    return max(1, math.ceil(len(text.strip()) / 2.5))
+
 def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
-    """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
     parts = []
     if doc_title: parts.append(doc_title)
     if sec_title and sec_title != doc_title: parts.append(sec_title)
@@ -18,8 +21,8 @@ def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) ->
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Universelles Sektions-Chunking: Packt Sektionen in Chunks.
-    Bei Überlauf wird die komplette Sektion in den nächsten Chunk geschoben.
+    Sektions-Chunking: Packt komplette Abschnitte in Chunks.
+    Bei Überlauf wird die Sektion ohne Ausnahme in den nächsten Chunk geschoben.
     """
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
@@ -29,8 +32,8 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     
     chunks: List[Chunk] = []
 
-    # --- HILFSFUNKTION: Erzeugt einen Chunk aus einer Blockliste ---
-    def _create_chunk_from_blocks(block_list: List[RawBlock]):
+    def _emit_chunk(block_list: List[RawBlock]):
+        """Schreibt eine Liste von Blöcken als einen einzigen, ungeteilten Chunk."""
         if not block_list: return
         txt = "\n\n".join([b.text for b in block_list])
         idx = len(chunks)
@@ -39,13 +42,13 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         win = _create_context_win(doc_title, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=estimate_tokens(txt),
+            text=txt, window=win, token_count=_accurate_estimate_tokens(txt),
             section_title=title, section_path=path,
             neighbors_prev=None, neighbors_next=None
         ))
 
-    # --- HILFSFUNKTION: Splittet eine einzelne Sektion, die > max ist ---
     def _split_giant_section(sec_blocks: List[RawBlock]):
+        """Notfall-Split: Nur wenn eine EINZELNE Sektion bereits > max ist."""
         full_text = "\n\n".join([b.text for b in sec_blocks])
         main_title = sec_blocks[0].section_title
         main_path = sec_blocks[0].section_path
@@ -55,85 +58,70 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         cur_sents = []; sub_len = 0
         
         for s in sents:
-            slen = estimate_tokens(s)
+            slen = _accurate_estimate_tokens(s)
             if sub_len + slen > target and cur_sents:
-                combined_text = " ".join(cur_sents)
-                _create_chunk_from_blocks([RawBlock("paragraph", combined_text, None, main_path, main_title)])
-                
-                # Context Injection: Überschrift für den nächsten Teil-Chunk
+                _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
                 ov_s = [header_text] if header_text else []
-                ov_l = estimate_tokens(header_text) if header_text else 0
+                ov_l = _accurate_estimate_tokens(header_text) if header_text else 0
                 for os in reversed(cur_sents):
                     if os == header_text: continue
-                    t_len = estimate_tokens(os)
+                    t_len = _accurate_estimate_tokens(os)
                     if ov_l + t_len < overlap:
                         ov_s.insert(len(ov_s)-1 if header_text else 0, os)
                         ov_l += t_len
                     else: break
                 cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
-            else:
-                cur_sents.append(s); sub_len += slen
+            else: cur_sents.append(s); sub_len += slen
         
-        if cur_sents:
-            _create_chunk_from_blocks([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
+        if cur_sents: _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
 
-    # 1. SCHRITT: Gruppierung in atomare Sektions-Einheiten
+    # 1. Gruppierung in atomare Einheiten
     sections: List[List[RawBlock]] = []
     curr_sec: List[RawBlock] = []
     for b in blocks:
-        # Eine neue Überschrift auf oder unter dem split_level startet eine neue Sektion
         if b.kind == "heading" and b.level <= split_level:
             if curr_sec: sections.append(curr_sec)
             curr_sec = [b]
-        else:
-            curr_sec.append(b)
+        else: curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # 2. SCHRITT: Sektionen in Chunks packen (Das universelle Pack-Verfahren)
+    # 2. Das Pack-Verfahren (Kein Zerschneiden beim Flashen!)
     candidate_chunk: List[RawBlock] = []
     candidate_tokens = 0
 
     for sec in sections:
         sec_text = "\n\n".join([b.text for b in sec])
-        sec_tokens = estimate_tokens(sec_text)
+        sec_tokens = _accurate_estimate_tokens(sec_text)
         
-        # Passt diese gesamte Sektion noch in den laufenden Chunk?
+        # Prüfung: Passt die Sektion noch dazu?
         if candidate_tokens + sec_tokens <= max_tokens:
             candidate_chunk.extend(sec)
-            candidate_tokens = estimate_tokens("\n\n".join([b.text for b in candidate_chunk]))
+            candidate_tokens = _accurate_estimate_tokens("\n\n".join([b.text for b in candidate_chunk]))
         else:
-            # Falls der aktuelle Chunk nicht leer ist: Raus damit, bevor die neue Sektion kommt
+            # Chunk ist voll -> Abschluss an Sektionsgrenze
             if candidate_chunk:
-                _create_chunk_from_blocks(candidate_chunk)
+                _emit_chunk(candidate_chunk)
                 candidate_chunk = []
                 candidate_tokens = 0
             
-            # Die neue Sektion ist nun allein. Ist sie selbst zu groß?
+            # Neue Sektion allein prüfen
             if sec_tokens > max_tokens:
                 _split_giant_section(sec)
             else:
                 candidate_chunk = list(sec)
                 candidate_tokens = sec_tokens
 
-    # Letzten Rest wegschreiben
-    if candidate_chunk:
-        _create_chunk_from_blocks(candidate_chunk)
-
+    if candidate_chunk: _emit_chunk(candidate_chunk)
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    """Standard-Sliding-Window für flache Texte ohne Sektionsfokus."""
-    target = config.get("target", 400)
-    max_tokens = config.get("max", 600)
-    chunks: List[Chunk] = []
-    buf: List[RawBlock] = []
-    
+    target = config.get("target", 400); max_tokens = config.get("max", 600)
+    chunks: List[Chunk] = []; buf: List[RawBlock] = []
     for b in blocks:
-        b_tokens = estimate_tokens(b.text)
-        current_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
+        b_tokens = _accurate_estimate_tokens(b.text)
+        current_tokens = sum(_accurate_estimate_tokens(x.text) for x in buf) if buf else 0
         if current_tokens + b_tokens > max_tokens and buf:
-            txt = "\n\n".join([x.text for x in buf])
-            idx = len(chunks)
+            txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
             win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
             chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=current_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
@@ -141,5 +129,5 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
         win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=_accurate_estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
     return chunks
\ No newline at end of file

From be265e9cc06fda945154ad2830313613ffc2fdb1 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 21:48:54 +0100
Subject: [PATCH 21/33] =?UTF-8?q?Verbesserung=20des=20Chunking-Parsers=20z?=
 =?UTF-8?q?ur=20Unterst=C3=BCtzung=20atomarer=20Bl=C3=B6cke=20und=20Gew?=
 =?UTF-8?q?=C3=A4hrleistung=20der=20strukturellen=20Integrit=C3=A4t=20von?=
 =?UTF-8?q?=20Callouts.=20Aktualisierung=20der=20Beschreibung=20und=20Opti?=
 =?UTF-8?q?mierung=20der=20Satz-=20und=20Blockverarbeitung,=20einschlie?=
 =?UTF-8?q?=C3=9Flich=20pr=C3=A4ziserer=20Handhabung=20von=20H1-=C3=9Cbers?=
 =?UTF-8?q?chriften=20und=20Trennern.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     |  43 ++++++----
 app/core/chunking/chunking_strategies.py | 100 ++++++++++++-----------
 2 files changed, 77 insertions(+), 66 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 8632185..fb3b65c 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,39 +1,44 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in logische Blöcke (RawBlocks). 
-             Gewährleistet, dass H1 und Trenner im Stream verbleiben.
+DESCRIPTION: Zerlegt Markdown in atomare Blöcke. Hält H1-Überschriften im Stream
+             und gewährleistet die strukturelle Integrität von Callouts.
 """
 import re
 from typing import List, Tuple, Set
 from .chunking_models import RawBlock
 from .chunking_utils import extract_frontmatter_from_text
 
-_WS = re.compile(r'\s+')
-_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
-
 def split_sentences(text: str) -> list[str]:
     """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
-    text = _WS.sub(' ', text.strip())
+    text = re.sub(r'\s+', ' ', text.strip())
     if not text: return []
-    return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
+    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
+    sentences = re.split(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])', text)
+    return [s.strip() for s in sentences if s.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
-    h1_title = "Dokument"; section_path = "/"; current_section_title = None
+    h1_title = "Dokument"
+    section_path = "/"
+    current_section_title = None
+    
+    # Frontmatter entfernen
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Metadaten extrahieren
+    # H1 für Note-Titel extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
-    if h1_match: h1_title = h1_match.group(1).strip()
+    if h1_match: 
+        h1_title = h1_match.group(1).strip()
     
     lines = text_without_fm.split('\n')
     buffer = []
     
     for line in lines:
         stripped = line.strip()
-        heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         
+        # Heading-Erkennung (H1 bis H6)
+        heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -52,14 +57,18 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        if (not stripped or stripped == "---") and not line.startswith('>'):
+        # Trenner (---) beenden Blöcke, Leerzeilen nur wenn nicht in Callout
+        if stripped == "---" and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
-                if content: 
-                    blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+                buffer = []
+            blocks.append(RawBlock("separator", "---", None, section_path, current_section_title))
+        elif not stripped and not line.startswith('>'):
+            if buffer:
+                content = "\n".join(buffer).strip()
+                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
-            if stripped == "---":
-                blocks.append(RawBlock("separator", "---", None, section_path, current_section_title))
         else: 
             buffer.append(line)
             
@@ -70,7 +79,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     return blocks, h1_title
 
 def parse_edges_robust(text: str) -> Set[str]:
-    """Extrahiert Kanten aus Wikilinks und Callouts."""
+    """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
     found_edges = set()
     inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
     for kind, target in inlines:
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 5b5a011..7936e88 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,39 +1,42 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Universelle Strategie für atomares Sektions-Chunking v3.6.0.
-             Garantiert Sektions-Integrität durch präventives Chunk-Management.
+DESCRIPTION: Strategie für atomares Sektions-Chunking v3.7.0.
+             Garantiert Sektions-Integrität durch ein flexibles Toleranz-Limit.
+             Kein Splitting von Sektionen, solange sie 'ungefähr' passen.
 """
 import math
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
 from .chunking_parser import split_sentences
 
-def _accurate_estimate_tokens(text: str) -> int:
-    """Konservative Schätzung für deutschen Text (len/2.5 statt len/4)."""
-    return max(1, math.ceil(len(text.strip()) / 2.5))
+# Toleranz-Faktor: Erlaubt Chunks, bis zu 15% über 'max' zu wachsen, 
+# um eine Sektion vollständig zu erhalten.
+FLEX_FACTOR = 1.15 
+
+def _safe_estimate(text: str) -> int:
+    """Sicherere Token-Schätzung für MD/Deutsch (Faktor 3.0 statt 4.0)."""
+    return max(1, math.ceil(len(text.strip()) / 3.0))
 
 def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
     parts = []
     if doc_title: parts.append(doc_title)
     if sec_title and sec_title != doc_title: parts.append(sec_title)
-    prefix = " > ".join(parts)
-    return f"{prefix}\n{text}".strip() if prefix else text
+    prefix = " > ".join(parts); return f"{prefix}\n{text}".strip() if prefix else text
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
-    """
-    Sektions-Chunking: Packt komplette Abschnitte in Chunks.
-    Bei Überlauf wird die Sektion ohne Ausnahme in den nächsten Chunk geschoben.
-    """
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     split_level = config.get("split_level", 2)
     overlap_cfg = config.get("overlap", (50, 80))
     overlap = sum(overlap_cfg) // 2 if isinstance(overlap_cfg, (list, tuple)) else overlap_cfg
     
+    # Das flexible Maximum, das Sektionen unzertrennt lässt
+    soft_max = int(max_tokens * FLEX_FACTOR) 
+    
     chunks: List[Chunk] = []
 
     def _emit_chunk(block_list: List[RawBlock]):
-        """Schreibt eine Liste von Blöcken als einen einzigen, ungeteilten Chunk."""
+        """Schreibt eine Liste von Blöcken als einen einzigen Chunk ohne internes Splitting."""
         if not block_list: return
         txt = "\n\n".join([b.text for b in block_list])
         idx = len(chunks)
@@ -42,40 +45,36 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         win = _create_context_win(doc_title, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=_accurate_estimate_tokens(txt),
+            text=txt, window=win, token_count=_safe_estimate(txt),
             section_title=title, section_path=path,
             neighbors_prev=None, neighbors_next=None
         ))
 
     def _split_giant_section(sec_blocks: List[RawBlock]):
-        """Notfall-Split: Nur wenn eine EINZELNE Sektion bereits > max ist."""
+        """Notfall-Split: Nur wenn eine EINZELNE Sektion bereits > soft_max ist."""
         full_text = "\n\n".join([b.text for b in sec_blocks])
-        main_title = sec_blocks[0].section_title
-        main_path = sec_blocks[0].section_path
+        main_title = sec_blocks[0].section_title; main_path = sec_blocks[0].section_path
         header_text = sec_blocks[0].text if sec_blocks[0].kind == "heading" else ""
         
         sents = split_sentences(full_text)
         cur_sents = []; sub_len = 0
-        
         for s in sents:
-            slen = _accurate_estimate_tokens(s)
+            slen = _safe_estimate(s)
             if sub_len + slen > target and cur_sents:
                 _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
                 ov_s = [header_text] if header_text else []
-                ov_l = _accurate_estimate_tokens(header_text) if header_text else 0
+                ov_l = _safe_estimate(header_text) if header_text else 0
                 for os in reversed(cur_sents):
                     if os == header_text: continue
-                    t_len = _accurate_estimate_tokens(os)
+                    t_len = _safe_estimate(os)
                     if ov_l + t_len < overlap:
-                        ov_s.insert(len(ov_s)-1 if header_text else 0, os)
-                        ov_l += t_len
+                        ov_s.insert(len(ov_s)-1 if header_text else 0, os); ov_l += t_len
                     else: break
                 cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
             else: cur_sents.append(s); sub_len += slen
-        
         if cur_sents: _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
 
-    # 1. Gruppierung in atomare Einheiten
+    # 1. Gruppierung in atomare Sektions-Einheiten
     sections: List[List[RawBlock]] = []
     curr_sec: List[RawBlock] = []
     for b in blocks:
@@ -85,41 +84,44 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         else: curr_sec.append(b)
     if curr_sec: sections.append(curr_sec)
 
-    # 2. Das Pack-Verfahren (Kein Zerschneiden beim Flashen!)
-    candidate_chunk: List[RawBlock] = []
-    candidate_tokens = 0
+    # 2. Das flexible Pack-Verfahren
+    current_chunk_buf: List[RawBlock] = []
+    current_tokens = 0
 
     for sec in sections:
         sec_text = "\n\n".join([b.text for b in sec])
-        sec_tokens = _accurate_estimate_tokens(sec_text)
+        sec_tokens = _safe_estimate(sec_text)
         
-        # Prüfung: Passt die Sektion noch dazu?
-        if candidate_tokens + sec_tokens <= max_tokens:
-            candidate_chunk.extend(sec)
-            candidate_tokens = _accurate_estimate_tokens("\n\n".join([b.text for b in candidate_chunk]))
-        else:
-            # Chunk ist voll -> Abschluss an Sektionsgrenze
-            if candidate_chunk:
-                _emit_chunk(candidate_chunk)
-                candidate_chunk = []
-                candidate_tokens = 0
-            
-            # Neue Sektion allein prüfen
-            if sec_tokens > max_tokens:
-                _split_giant_section(sec)
-            else:
-                candidate_chunk = list(sec)
-                candidate_tokens = sec_tokens
+        if current_chunk_buf:
+            # PRÜFUNG: Würde die neue Sektion das FLEXIBLE Limit sprengen?
+            if (current_tokens + sec_tokens > soft_max):
+                _emit_chunk(current_chunk_buf)
+                current_chunk_buf = []
+                current_tokens = 0
+            # Haben wir das Ziel-Maß erreicht und es kommt eine neue Sektion?
+            elif (current_tokens >= target):
+                _emit_chunk(current_chunk_buf)
+                current_chunk_buf = []
+                current_tokens = 0
+
+        # Wenn eine EINZELNE Sektion alleine schon das weiche Limit sprengt
+        if not current_chunk_buf and sec_tokens > soft_max:
+            _split_giant_section(sec)
+        else:
+            current_chunk_buf.extend(sec)
+            current_tokens = _safe_estimate("\n\n".join([b.text for b in current_chunk_buf]))
+
+    if current_chunk_buf:
+        _emit_chunk(current_chunk_buf)
 
-    if candidate_chunk: _emit_chunk(candidate_chunk)
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
     for b in blocks:
-        b_tokens = _accurate_estimate_tokens(b.text)
-        current_tokens = sum(_accurate_estimate_tokens(x.text) for x in buf) if buf else 0
+        b_tokens = _safe_estimate(b.text)
+        current_tokens = sum(_safe_estimate(x.text) for x in buf) if buf else 0
         if current_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
             win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
@@ -129,5 +131,5 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
         win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=_accurate_estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=_safe_estimate(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
     return chunks
\ No newline at end of file

From 6b83879741ec4267bff06dfc3dc80200c05fc8b7 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 22:04:23 +0100
Subject: [PATCH 22/33] =?UTF-8?q?Aktualisierung=20des=20Chunking-Parsers?=
 =?UTF-8?q?=20zur=20Verbesserung=20der=20Satzverarbeitung=20und=20Blocktre?=
 =?UTF-8?q?nnung.=20Einf=C3=BChrung=20des=20'Pack-and-Carry-Over'=20Verfah?=
 =?UTF-8?q?rens=20in=20der=20Sektions-Chunking-Strategie=20zur=20Optimieru?=
 =?UTF-8?q?ng=20der=20Handhabung=20von=20gro=C3=9Fen=20Sektionen=20und=20G?=
 =?UTF-8?q?ew=C3=A4hrleistung=20der=20Sektionsintegrit=C3=A4t.=20Anpassung?=
 =?UTF-8?q?en=20an=20der=20Token-Sch=C3=A4tzung=20und=20Verbesserung=20der?=
 =?UTF-8?q?=20Metadatenverarbeitung.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     |  36 ++---
 app/core/chunking/chunking_strategies.py | 178 ++++++++++++-----------
 2 files changed, 108 insertions(+), 106 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index fb3b65c..696650a 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,6 +1,6 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in atomare Blöcke. Hält H1-Überschriften im Stream
+DESCRIPTION: Zerlegt Markdown in logische Blöcke. Hält H1-Überschriften im Stream 
              und gewährleistet die strukturelle Integrität von Callouts.
 """
 import re
@@ -8,28 +8,25 @@ from typing import List, Tuple, Set
 from .chunking_models import RawBlock
 from .chunking_utils import extract_frontmatter_from_text
 
+_WS = re.compile(r'\s+')
+_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
+
 def split_sentences(text: str) -> list[str]:
     """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
-    text = re.sub(r'\s+', ' ', text.strip())
+    text = _WS.sub(' ', text.strip())
     if not text: return []
-    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
-    sentences = re.split(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])', text)
-    return [s.strip() for s in sentences if s.strip()]
+    # Splittet bei Punkt, Ausrufezeichen oder Fragezeichen, gefolgt von Großbuchstabe
+    return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
-    h1_title = "Dokument"
-    section_path = "/"
-    current_section_title = None
-    
-    # Frontmatter entfernen
+    h1_title = "Dokument"; section_path = "/"; current_section_title = None
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
     # H1 für Note-Titel extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
-    if h1_match: 
-        h1_title = h1_match.group(1).strip()
+    if h1_match: h1_title = h1_match.group(1).strip()
     
     lines = text_without_fm.split('\n')
     buffer = []
@@ -57,18 +54,15 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Trenner (---) beenden Blöcke, Leerzeilen nur wenn nicht in Callout
-        if stripped == "---" and not line.startswith('>'):
+        # Trenner oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
+        if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
-                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
-                buffer = []
-            blocks.append(RawBlock("separator", "---", None, section_path, current_section_title))
-        elif not stripped and not line.startswith('>'):
-            if buffer:
-                content = "\n".join(buffer).strip()
-                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+                if content: 
+                    blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
+            if stripped == "---":
+                blocks.append(RawBlock("separator", "---", None, section_path, current_section_title))
         else: 
             buffer.append(line)
             
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 7936e88..d3e8cb6 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,135 +1,143 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategie für atomares Sektions-Chunking v3.7.0.
-             Garantiert Sektions-Integrität durch ein flexibles Toleranz-Limit.
-             Kein Splitting von Sektionen, solange sie 'ungefähr' passen.
+DESCRIPTION: Strategien für atomares Sektions-Chunking v3.8.5.
+             Implementiert das 'Pack-and-Carry-Over' Verfahren:
+             1. Packt ganze Abschnitte basierend auf Schätzung.
+             2. Kein physischer Overflow-Check während des Packens.
+             3. Smart-Zerlegung von Übergrößen mit Carry-Over in die Queue.
 """
-import math
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
+from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
-# Toleranz-Faktor: Erlaubt Chunks, bis zu 15% über 'max' zu wachsen, 
-# um eine Sektion vollständig zu erhalten.
-FLEX_FACTOR = 1.15 
-
-def _safe_estimate(text: str) -> int:
-    """Sicherere Token-Schätzung für MD/Deutsch (Faktor 3.0 statt 4.0)."""
-    return max(1, math.ceil(len(text.strip()) / 3.0))
-
-def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
-    parts = []
-    if doc_title: parts.append(doc_title)
+def _create_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
+    parts = [doc_title] if doc_title else []
     if sec_title and sec_title != doc_title: parts.append(sec_title)
-    prefix = " > ".join(parts); return f"{prefix}\n{text}".strip() if prefix else text
+    prefix = " > ".join(parts)
+    return f"{prefix}\n{text}".strip() if prefix else text
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
+    """
+    Universelle Heading-Strategie mit Fallunterscheidung für Smart-Edge-Allocation.
+    """
+    smart_edge = config.get("enable_smart_edge_allocation", True)
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     split_level = config.get("split_level", 2)
     overlap_cfg = config.get("overlap", (50, 80))
     overlap = sum(overlap_cfg) // 2 if isinstance(overlap_cfg, (list, tuple)) else overlap_cfg
     
-    # Das flexible Maximum, das Sektionen unzertrennt lässt
-    soft_max = int(max_tokens * FLEX_FACTOR) 
-    
     chunks: List[Chunk] = []
 
-    def _emit_chunk(block_list: List[RawBlock]):
-        """Schreibt eine Liste von Blöcken als einen einzigen Chunk ohne internes Splitting."""
-        if not block_list: return
-        txt = "\n\n".join([b.text for b in block_list])
+    def _emit(txt, title, path):
         idx = len(chunks)
-        title = block_list[0].section_title
-        path = block_list[0].section_path
-        win = _create_context_win(doc_title, title, txt)
+        win = _create_win(doc_title, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=_safe_estimate(txt),
-            section_title=title, section_path=path,
-            neighbors_prev=None, neighbors_next=None
+            text=txt, window=win, token_count=estimate_tokens(txt),
+            section_title=title, section_path=path, neighbors_prev=None, neighbors_next=None
         ))
 
-    def _split_giant_section(sec_blocks: List[RawBlock]):
-        """Notfall-Split: Nur wenn eine EINZELNE Sektion bereits > soft_max ist."""
-        full_text = "\n\n".join([b.text for b in sec_blocks])
-        main_title = sec_blocks[0].section_title; main_path = sec_blocks[0].section_path
-        header_text = sec_blocks[0].text if sec_blocks[0].kind == "heading" else ""
-        
-        sents = split_sentences(full_text)
-        cur_sents = []; sub_len = 0
-        for s in sents:
-            slen = _safe_estimate(s)
-            if sub_len + slen > target and cur_sents:
-                _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
-                ov_s = [header_text] if header_text else []
-                ov_l = _safe_estimate(header_text) if header_text else 0
-                for os in reversed(cur_sents):
-                    if os == header_text: continue
-                    t_len = _safe_estimate(os)
-                    if ov_l + t_len < overlap:
-                        ov_s.insert(len(ov_s)-1 if header_text else 0, os); ov_l += t_len
-                    else: break
-                cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
-            else: cur_sents.append(s); sub_len += slen
-        if cur_sents: _emit_chunk([RawBlock("paragraph", " ".join(cur_sents), None, main_path, main_title)])
+    # --- FALL A: HARD SPLIT (enable_smart_edge_allocation: false) ---
+    if not smart_edge:
+        buf = []
+        for b in blocks:
+            # Trenne bei jeder Überschrift <= split_level
+            if b.kind == "heading" and b.level <= split_level:
+                # Nur flashen, wenn der Puffer nicht nur aus der aktuellen Überschrift besteht
+                if buf and not (len(buf) == 1 and buf[0].kind == "heading"):
+                    _emit("\n\n".join([x.text for x in buf]), buf[0].section_title, buf[0].section_path)
+                    buf = []
+            buf.append(b)
+        if buf: _emit("\n\n".join([x.text for x in buf]), buf[0].section_title, buf[0].section_path)
+        return chunks
 
-    # 1. Gruppierung in atomare Sektions-Einheiten
+    # --- FALL B: SMART EDGE ALLOCATION (Pack-and-Carry-Over) ---
+    # 1. Gruppierung in atomare Sektions-Einheiten (Sektions-Isolation)
     sections: List[List[RawBlock]] = []
-    curr_sec: List[RawBlock] = []
+    curr = []
     for b in blocks:
         if b.kind == "heading" and b.level <= split_level:
-            if curr_sec: sections.append(curr_sec)
-            curr_sec = [b]
-        else: curr_sec.append(b)
-    if curr_sec: sections.append(curr_sec)
+            if curr: sections.append(curr)
+            curr = [b]
+        else: curr.append(b)
+    if curr: sections.append(curr)
 
-    # 2. Das flexible Pack-Verfahren
-    current_chunk_buf: List[RawBlock] = []
-    current_tokens = 0
+    # 2. Queue-Management für Carry-Over
+    processing_queue = [{"blocks": s, "text": "\n\n".join([b.text for b in s])} for s in sections]
+    current_chunk_text = ""
+    current_meta = {"title": None, "path": "/"}
 
-    for sec in sections:
-        sec_text = "\n\n".join([b.text for b in sec])
-        sec_tokens = _safe_estimate(sec_text)
+    while processing_queue:
+        item = processing_queue.pop(0)
+        item_text = item["text"]
+        item_tokens = estimate_tokens(item_text)
         
-        if current_chunk_buf:
-            # PRÜFUNG: Würde die neue Sektion das FLEXIBLE Limit sprengen?
-            if (current_tokens + sec_tokens > soft_max):
-                _emit_chunk(current_chunk_buf)
-                current_chunk_buf = []
-                current_tokens = 0
-            # Haben wir das Ziel-Maß erreicht und es kommt eine neue Sektion?
-            elif (current_tokens >= target):
-                _emit_chunk(current_chunk_buf)
-                current_chunk_buf = []
-                current_tokens = 0
+        # Metadaten-Initialisierung falls Chunk leer
+        if not current_chunk_text and "blocks" in item:
+            current_meta["title"] = item["blocks"][0].section_title
+            current_meta["path"] = item["blocks"][0].section_path
 
-        # Wenn eine EINZELNE Sektion alleine schon das weiche Limit sprengt
-        if not current_chunk_buf and sec_tokens > soft_max:
-            _split_giant_section(sec)
+        combined_est = estimate_tokens(current_chunk_text + "\n\n" + item_text) if current_chunk_text else item_tokens
+
+        # Regel 1: Passt die vollständige Sektion nach Schätzung rein? (Kein harter Overflow-Check)
+        if combined_est <= max_tokens:
+            current_chunk_text = (current_chunk_text + "\n\n" + item_text).strip()
         else:
-            current_chunk_buf.extend(sec)
-            current_tokens = _safe_estimate("\n\n".join([b.text for b in current_chunk_buf]))
+            # Regel 2: Wenn Puffer voll -> Emittieren und Sektion zurücklegen
+            if current_chunk_text:
+                _emit(current_chunk_text, current_meta["title"], current_meta["path"])
+                current_chunk_text = ""
+                processing_queue.insert(0, item)
+            else:
+                # Regel 3: Einzelne Sektion zu groß -> Smart Zerlegung
+                sents = split_sentences(item_text)
+                header_text = ""
+                if "blocks" in item and item["blocks"][0].kind == "heading":
+                    header_text = item["blocks"][0].text
 
-    if current_chunk_buf:
-        _emit_chunk(current_chunk_buf)
+                take_sents = []; take_len = 0
+                while sents:
+                    s = sents.pop(0)
+                    slen = estimate_tokens(s)
+                    if take_len + slen > target and take_sents:
+                        sents.insert(0, s); break
+                    take_sents.append(s); take_len += slen
+                
+                _emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
+                
+                # Carry-Over: Der Rest wird an den Anfang der Queue geschoben
+                if sents:
+                    remainder_text = " ".join(sents)
+                    # Falls wir einen Header haben, fügen wir ihn dem Rest für den Kontext hinzu
+                    if header_text and not remainder_text.startswith(header_text):
+                        remainder_text = header_text + "\n\n" + remainder_text
+                    processing_queue.insert(0, {"text": remainder_text, "is_split": True})
+
+    if current_chunk_text:
+        _emit(current_chunk_text, current_meta["title"], current_meta["path"])
 
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
+    """Sliding Window Strategie: Erhalten für alternative Anwendungsfälle."""
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
+    
     for b in blocks:
-        b_tokens = _safe_estimate(b.text)
-        current_tokens = sum(_safe_estimate(x.text) for x in buf) if buf else 0
+        b_tokens = estimate_tokens(b.text)
+        current_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
         if current_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
             win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
             chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=current_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
         buf.append(b)
+        
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
         win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=_safe_estimate(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        
     return chunks
\ No newline at end of file

From 8f65e550c86a77cb77532b848f89f38b85b50c7b Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 29 Dec 2025 22:16:12 +0100
Subject: [PATCH 23/33] =?UTF-8?q?Optimierung=20des=20Chunking-Parsers=20zu?=
 =?UTF-8?q?r=20Unterst=C3=BCtzung=20atomarer=20Bl=C3=B6cke=20und=20Verbess?=
 =?UTF-8?q?erung=20der=20Satzverarbeitung.=20Aktualisierung=20der=20Sektio?=
 =?UTF-8?q?ns-Chunking-Strategie=20auf=20Version=203.9.0=20mit=20regelkonf?=
 =?UTF-8?q?ormer=20Implementierung=20und=20Anpassungen=20an=20der=20Wartes?=
 =?UTF-8?q?chlangen-Verarbeitung=20f=C3=BCr=20Carry-Over.=20Verbesserte=20?=
 =?UTF-8?q?Handhabung=20von=20=C3=9Cberschriften=20und=20Metadaten=20zur?=
 =?UTF-8?q?=20Gew=C3=A4hrleistung=20der=20strukturellen=20Integrit=C3=A4t.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 21 ++---
 app/core/chunking/chunking_strategies.py | 99 ++++++++++++------------
 2 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 696650a..e55f032 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,6 +1,6 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in logische Blöcke. Hält H1-Überschriften im Stream 
+DESCRIPTION: Zerlegt Markdown in atomare Blöcke. Hält H1-Überschriften im Stream
              und gewährleistet die strukturelle Integrität von Callouts.
 """
 import re
@@ -8,15 +8,12 @@ from typing import List, Tuple, Set
 from .chunking_models import RawBlock
 from .chunking_utils import extract_frontmatter_from_text
 
-_WS = re.compile(r'\s+')
-_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
-
 def split_sentences(text: str) -> list[str]:
     """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
-    text = _WS.sub(' ', text.strip())
+    text = re.sub(r'\s+', ' ', text.strip())
     if not text: return []
-    # Splittet bei Punkt, Ausrufezeichen oder Fragezeichen, gefolgt von Großbuchstabe
-    return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
+    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
+    return [s.strip() for s in re.split(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])', text) if s.strip()]
 
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
@@ -24,7 +21,6 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     h1_title = "Dokument"; section_path = "/"; current_section_title = None
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Note-Titel extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: h1_title = h1_match.group(1).strip()
     
@@ -33,28 +29,23 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     
     for line in lines:
         stripped = line.strip()
-        
-        # Heading-Erkennung (H1 bis H6)
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
+        
         if heading_match:
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
                     blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
-            
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
-            
             if level == 1:
                 current_section_title = title; section_path = "/"
             elif level == 2:
                 current_section_title = title; section_path = f"/{current_section_title}"
-            
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Trenner oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
         if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -69,7 +60,6 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     if buffer:
         content = "\n".join(buffer).strip()
         if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
-            
     return blocks, h1_title
 
 def parse_edges_robust(text: str) -> Set[str]:
@@ -79,7 +69,6 @@ def parse_edges_robust(text: str) -> Set[str]:
     for kind, target in inlines:
         k = kind.strip().lower(); t = target.strip()
         if k and t: found_edges.add(f"{k}:{t}")
-    
     lines = text.split('\n'); current_edge_type = None
     for line in lines:
         stripped = line.strip()
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index d3e8cb6..de995fd 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,10 +1,7 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategien für atomares Sektions-Chunking v3.8.5.
-             Implementiert das 'Pack-and-Carry-Over' Verfahren:
-             1. Packt ganze Abschnitte basierend auf Schätzung.
-             2. Kein physischer Overflow-Check während des Packens.
-             3. Smart-Zerlegung von Übergrößen mit Carry-Over in die Queue.
+DESCRIPTION: Universelle Strategie für atomares Sektions-Chunking v3.9.0.
+             Regelkonforme Implementierung: Pack-Sections, Trust Estimation, Carry-Over.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -18,9 +15,6 @@ def _create_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
     return f"{prefix}\n{text}".strip() if prefix else text
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
-    """
-    Universelle Heading-Strategie mit Fallunterscheidung für Smart-Edge-Allocation.
-    """
     smart_edge = config.get("enable_smart_edge_allocation", True)
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
@@ -43,10 +37,11 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     if not smart_edge:
         buf = []
         for b in blocks:
-            # Trenne bei jeder Überschrift <= split_level
+            # Trenne hart bei Überschrift <= split_level
             if b.kind == "heading" and b.level <= split_level:
-                # Nur flashen, wenn der Puffer nicht nur aus der aktuellen Überschrift besteht
-                if buf and not (len(buf) == 1 and buf[0].kind == "heading"):
+                # Prüfe, ob Puffer mehr als nur Überschriften enthält (keine leeren Chunks)
+                has_content = any(x.kind != "heading" for x in buf)
+                if buf and has_content:
                     _emit("\n\n".join([x.text for x in buf]), buf[0].section_title, buf[0].section_path)
                     buf = []
             buf.append(b)
@@ -54,49 +49,52 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         return chunks
 
     # --- FALL B: SMART EDGE ALLOCATION (Pack-and-Carry-Over) ---
-    # 1. Gruppierung in atomare Sektions-Einheiten (Sektions-Isolation)
-    sections: List[List[RawBlock]] = []
-    curr = []
+    # 1. Gruppierung in atomare Einheiten (Sektions-Isolation)
+    sections: List[Dict[str, Any]] = []
+    curr_blocks = []
     for b in blocks:
         if b.kind == "heading" and b.level <= split_level:
-            if curr: sections.append(curr)
-            curr = [b]
-        else: curr.append(b)
-    if curr: sections.append(curr)
+            if curr_blocks:
+                sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), "meta": curr_blocks[0]})
+            curr_blocks = [b]
+        else: curr_blocks.append(b)
+    if curr_blocks:
+        sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), "meta": curr_blocks[0]})
 
-    # 2. Queue-Management für Carry-Over
-    processing_queue = [{"blocks": s, "text": "\n\n".join([b.text for b in s])} for s in sections]
+    # 2. Warteschlangen-Verarbeitung (Regel 1-3)
+    # Wir nutzen eine Liste als Queue für Carry-Over-Reste
+    queue = list(sections)
     current_chunk_text = ""
     current_meta = {"title": None, "path": "/"}
 
-    while processing_queue:
-        item = processing_queue.pop(0)
+    while queue:
+        item = queue.pop(0)
         item_text = item["text"]
-        item_tokens = estimate_tokens(item_text)
         
-        # Metadaten-Initialisierung falls Chunk leer
-        if not current_chunk_text and "blocks" in item:
-            current_meta["title"] = item["blocks"][0].section_title
-            current_meta["path"] = item["blocks"][0].section_path
+        # Initialisiere Metadaten für einen neuen Chunk
+        if not current_chunk_text:
+            current_meta["title"] = item["meta"].section_title
+            current_meta["path"] = item["meta"].section_path
 
-        combined_est = estimate_tokens(current_chunk_text + "\n\n" + item_text) if current_chunk_text else item_tokens
+        # Schätzung (Regel 2: Wir verlassen uns darauf)
+        combined_text = (current_chunk_text + "\n\n" + item_text).strip() if current_chunk_text else item_text
+        combined_est = estimate_tokens(combined_text)
 
-        # Regel 1: Passt die vollständige Sektion nach Schätzung rein? (Kein harter Overflow-Check)
         if combined_est <= max_tokens:
-            current_chunk_text = (current_chunk_text + "\n\n" + item_text).strip()
+            # Regel 1: Vollständiger Abschnitt passt -> Aufnehmen
+            current_chunk_text = combined_text
         else:
-            # Regel 2: Wenn Puffer voll -> Emittieren und Sektion zurücklegen
+            # Er passt nicht ganz rein.
             if current_chunk_text:
+                # Puffer ist bereits gefüllt -> Wegschreiben, Item zurück in die Queue
                 _emit(current_chunk_text, current_meta["title"], current_meta["path"])
                 current_chunk_text = ""
-                processing_queue.insert(0, item)
+                queue.insert(0, item)
             else:
-                # Regel 3: Einzelne Sektion zu groß -> Smart Zerlegung
+                # Regel 3: Einzelner Abschnitt allein ist > max -> Smart Zerlegung
                 sents = split_sentences(item_text)
-                header_text = ""
-                if "blocks" in item and item["blocks"][0].kind == "heading":
-                    header_text = item["blocks"][0].text
-
+                header_prefix = item["meta"].text if item["meta"].kind == "heading" else ""
+                
                 take_sents = []; take_len = 0
                 while sents:
                     s = sents.pop(0)
@@ -105,39 +103,38 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                         sents.insert(0, s); break
                     take_sents.append(s); take_len += slen
                 
+                # Ersten Teil emittieren
                 _emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
                 
-                # Carry-Over: Der Rest wird an den Anfang der Queue geschoben
+                # Rest als Carry-Over zurück in die Queue (Regel 3)
                 if sents:
                     remainder_text = " ".join(sents)
-                    # Falls wir einen Header haben, fügen wir ihn dem Rest für den Kontext hinzu
-                    if header_text and not remainder_text.startswith(header_text):
-                        remainder_text = header_text + "\n\n" + remainder_text
-                    processing_queue.insert(0, {"text": remainder_text, "is_split": True})
+                    # Kontext-Erhalt: Überschrift für den Rest wiederholen
+                    if header_prefix and not remainder_text.startswith(header_prefix):
+                        remainder_text = header_prefix + "\n\n" + remainder_text
+                    queue.insert(0, {"text": remainder_text, "meta": item["meta"]})
 
+    # Letzten Rest wegschreiben
     if current_chunk_text:
         _emit(current_chunk_text, current_meta["title"], current_meta["path"])
 
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    """Sliding Window Strategie: Erhalten für alternative Anwendungsfälle."""
+    """Sliding Window: Unverändert erhalten für Standard-Typen."""
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
-    
     for b in blocks:
         b_tokens = estimate_tokens(b.text)
-        current_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
-        if current_tokens + b_tokens > max_tokens and buf:
+        curr_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
+        if curr_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
-            win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
-            chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=current_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+            win = _create_win(doc_title=context_prefix, sec_title=buf[0].section_title, text=txt)
+            chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=curr_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
         buf.append(b)
-        
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
-        win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
+        win = _create_win(doc_title=context_prefix, sec_title=buf[0].section_title, text=txt)
         chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
-        
     return chunks
\ No newline at end of file

From 3c5c567077daad8427abbc80ed72ff717a3d8011 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 07:41:30 +0100
Subject: [PATCH 24/33] =?UTF-8?q?Aktualisierung=20der=20atomaren=20Sektion?=
 =?UTF-8?q?s-Chunking-Strategie=20auf=20Version=203.9.5=20mit=20Implementi?=
 =?UTF-8?q?erung=20des=20'Pack-and-Carry-Over'=20Verfahrens.=20Einf=C3=BCh?=
 =?UTF-8?q?rung=20neuer=20Konfigurationsoptionen=20f=C3=BCr=20Smart-Edge?=
 =?UTF-8?q?=20und=20strikte=20=C3=9Cberschriftenteilung.=20Verbesserte=20H?=
 =?UTF-8?q?andhabung=20von=20leeren=20=C3=9Cberschriften=20und=20Anpassung?=
 =?UTF-8?q?en=20an=20der=20Warteschlangen-Verarbeitung=20zur=20Optimierung?=
 =?UTF-8?q?=20der=20Chunk-Erstellung.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 44 ++++-------
 app/core/chunking/chunking_strategies.py | 95 +++++++++++++-----------
 2 files changed, 67 insertions(+), 72 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index e55f032..1448932 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -8,19 +8,13 @@ from typing import List, Tuple, Set
 from .chunking_models import RawBlock
 from .chunking_utils import extract_frontmatter_from_text
 
-def split_sentences(text: str) -> list[str]:
-    """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
-    text = re.sub(r'\s+', ' ', text.strip())
-    if not text: return []
-    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
-    return [s.strip() for s in re.split(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])', text) if s.strip()]
-
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
     h1_title = "Dokument"; section_path = "/"; current_section_title = None
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
+    # H1 für Metadaten extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: h1_title = h1_match.group(1).strip()
     
@@ -32,20 +26,26 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         
         if heading_match:
+            # Vorherigen Text-Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
                     blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
                 buffer = []
+            
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
+            
+            # Pfad- und Titel-Update
             if level == 1:
                 current_section_title = title; section_path = "/"
             elif level == 2:
                 current_section_title = title; section_path = f"/{current_section_title}"
+            
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
+        # Trenner oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
         if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -60,28 +60,12 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     if buffer:
         content = "\n".join(buffer).strip()
         if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+            
     return blocks, h1_title
 
-def parse_edges_robust(text: str) -> Set[str]:
-    """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
-    found_edges = set()
-    inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
-    for kind, target in inlines:
-        k = kind.strip().lower(); t = target.strip()
-        if k and t: found_edges.add(f"{k}:{t}")
-    lines = text.split('\n'); current_edge_type = None
-    for line in lines:
-        stripped = line.strip()
-        callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
-        if callout_match:
-            current_edge_type = callout_match.group(1).strip().lower()
-            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
-            for l in links: 
-                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
-            continue
-        if current_edge_type and stripped.startswith('>'):
-            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
-            for l in links: 
-                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
-        elif not stripped.startswith('>'): current_edge_type = None
-    return found_edges
\ No newline at end of file
+def split_sentences(text: str) -> list[str]:
+    """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
+    text = re.sub(r'\s+', ' ', text.strip())
+    if not text: return []
+    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
+    return [s.strip() for s in re.split(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])', text) if s.strip()]
\ No newline at end of file
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index de995fd..562808b 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Universelle Strategie für atomares Sektions-Chunking v3.9.0.
-             Regelkonforme Implementierung: Pack-Sections, Trust Estimation, Carry-Over.
+DESCRIPTION: Strategie für atomares Sektions-Chunking v3.9.5.
+             Implementiert das 'Pack-and-Carry-Over' Verfahren nach Nutzerwunsch.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -15,7 +15,14 @@ def _create_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
     return f"{prefix}\n{text}".strip() if prefix else text
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
+    """
+    Universelle Sektions-Strategie:
+    - Smart-Edge=True: Packt Sektionen basierend auf Schätzung (Regel 1-3).
+    - Smart-Edge=False: Hard Split an Überschriften (außer leere Header).
+    - Strict=True erzwingt Hard Split Verhalten innerhalb der Smart-Logik.
+    """
     smart_edge = config.get("enable_smart_edge_allocation", True)
+    strict = config.get("strict_heading_split", False)
     target = config.get("target", 400)
     max_tokens = config.get("max", 600)
     split_level = config.get("split_level", 2)
@@ -33,65 +40,72 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
             section_title=title, section_path=path, neighbors_prev=None, neighbors_next=None
         ))
 
-    # --- FALL A: HARD SPLIT (enable_smart_edge_allocation: false) ---
-    if not smart_edge:
-        buf = []
-        for b in blocks:
-            # Trenne hart bei Überschrift <= split_level
-            if b.kind == "heading" and b.level <= split_level:
-                # Prüfe, ob Puffer mehr als nur Überschriften enthält (keine leeren Chunks)
-                has_content = any(x.kind != "heading" for x in buf)
-                if buf and has_content:
-                    _emit("\n\n".join([x.text for x in buf]), buf[0].section_title, buf[0].section_path)
-                    buf = []
-            buf.append(b)
-        if buf: _emit("\n\n".join([x.text for x in buf]), buf[0].section_title, buf[0].section_path)
-        return chunks
-
-    # --- FALL B: SMART EDGE ALLOCATION (Pack-and-Carry-Over) ---
-    # 1. Gruppierung in atomare Einheiten (Sektions-Isolation)
+    # --- SCHRITT 1: Gruppierung in atomare Sektions-Einheiten ---
     sections: List[Dict[str, Any]] = []
     curr_blocks = []
     for b in blocks:
         if b.kind == "heading" and b.level <= split_level:
             if curr_blocks:
-                sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), "meta": curr_blocks[0]})
+                sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), 
+                                 "meta": curr_blocks[0], "is_empty": len(curr_blocks) == 1})
             curr_blocks = [b]
         else: curr_blocks.append(b)
     if curr_blocks:
-        sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), "meta": curr_blocks[0]})
+        sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), 
+                         "meta": curr_blocks[0], "is_empty": len(curr_blocks) == 1})
 
-    # 2. Warteschlangen-Verarbeitung (Regel 1-3)
-    # Wir nutzen eine Liste als Queue für Carry-Over-Reste
+    # --- SCHRITT 2: Verarbeitung der Queue ---
     queue = list(sections)
     current_chunk_text = ""
     current_meta = {"title": None, "path": "/"}
 
+    # Hard-Split-Bedingung: Entweder Smart-Edge aus ODER Profil ist Strict
+    is_hard_split_mode = (not smart_edge) or (strict)
+
     while queue:
         item = queue.pop(0)
         item_text = item["text"]
         
-        # Initialisiere Metadaten für einen neuen Chunk
+        # Initialisierung für neuen Chunk
         if not current_chunk_text:
             current_meta["title"] = item["meta"].section_title
             current_meta["path"] = item["meta"].section_path
 
-        # Schätzung (Regel 2: Wir verlassen uns darauf)
+        # FALL A: Hard Split Modus (Regel: Trenne bei jeder Sektion <= Level)
+        if is_hard_split_mode:
+            # Regel: Leere Überschriften verbleiben am nächsten Chunk
+            if item.get("is_empty", False) and queue:
+                current_chunk_text = (current_chunk_text + "\n\n" + item_text).strip()
+                continue # Nimm das nächste Item dazu
+            
+            combined = (current_chunk_text + "\n\n" + item_text).strip()
+            if estimate_tokens(combined) > max_tokens and current_chunk_text:
+                # Falls es trotz Hard-Split zu groß wird, flashen wir erst den alten Teil
+                _emit(current_chunk_text, current_meta["title"], current_meta["path"])
+                current_chunk_text = item_text
+            else:
+                current_chunk_text = combined
+            
+            # Im Hard Split flashen wir nach jeder Sektion, die nicht leer ist
+            _emit(current_chunk_text, current_meta["title"], current_meta["path"])
+            current_chunk_text = ""
+            continue
+
+        # FALL B: Smart Mode (Regel 1-3)
         combined_text = (current_chunk_text + "\n\n" + item_text).strip() if current_chunk_text else item_text
         combined_est = estimate_tokens(combined_text)
 
         if combined_est <= max_tokens:
-            # Regel 1: Vollständiger Abschnitt passt -> Aufnehmen
+            # Regel 1 & 2: Passt nach Schätzung -> Aufnehmen
             current_chunk_text = combined_text
         else:
-            # Er passt nicht ganz rein.
+            # Regel 3: Passt nicht -> Entweder Puffer flashen oder Item zerlegen
             if current_chunk_text:
-                # Puffer ist bereits gefüllt -> Wegschreiben, Item zurück in die Queue
                 _emit(current_chunk_text, current_meta["title"], current_meta["path"])
                 current_chunk_text = ""
-                queue.insert(0, item)
+                queue.insert(0, item) # Item für neuen Chunk zurücklegen
             else:
-                # Regel 3: Einzelner Abschnitt allein ist > max -> Smart Zerlegung
+                # Einzelne Sektion zu groß -> Smart Zerlegung
                 sents = split_sentences(item_text)
                 header_prefix = item["meta"].text if item["meta"].kind == "heading" else ""
                 
@@ -103,25 +117,22 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                         sents.insert(0, s); break
                     take_sents.append(s); take_len += slen
                 
-                # Ersten Teil emittieren
                 _emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
                 
-                # Rest als Carry-Over zurück in die Queue (Regel 3)
+                # Carry-Over: Rest wird vorne in die Queue geschoben
                 if sents:
-                    remainder_text = " ".join(sents)
-                    # Kontext-Erhalt: Überschrift für den Rest wiederholen
-                    if header_prefix and not remainder_text.startswith(header_prefix):
-                        remainder_text = header_prefix + "\n\n" + remainder_text
-                    queue.insert(0, {"text": remainder_text, "meta": item["meta"]})
+                    remainder = " ".join(sents)
+                    if header_prefix and not remainder.startswith(header_prefix):
+                        remainder = header_prefix + "\n\n" + remainder
+                    queue.insert(0, {"text": remainder, "meta": item["meta"], "is_split": True})
 
-    # Letzten Rest wegschreiben
     if current_chunk_text:
         _emit(current_chunk_text, current_meta["title"], current_meta["path"])
 
     return chunks
 
-def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
-    """Sliding Window: Unverändert erhalten für Standard-Typen."""
+def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
+    """Standard Sliding Window Strategie."""
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
     for b in blocks:
@@ -129,12 +140,12 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
         curr_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
         if curr_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
-            win = _create_win(doc_title=context_prefix, sec_title=buf[0].section_title, text=txt)
+            win = _create_win(doc_title, buf[0].section_title, txt)
             chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=curr_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
         buf.append(b)
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
-        win = _create_win(doc_title=context_prefix, sec_title=buf[0].section_title, text=txt)
+        win = _create_win(doc_title, buf[0].section_title, txt)
         chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
     return chunks
\ No newline at end of file

From 06fc42ed37d418423ca5f7396ea47681011a0ccd Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 07:44:30 +0100
Subject: [PATCH 25/33] =?UTF-8?q?Aktualisierung=20des=20Chunking-Parsers?=
 =?UTF-8?q?=20zur=20Einf=C3=BChrung=20der=20Funktion=20`parse=5Fedges=5Fro?=
 =?UTF-8?q?bust`=20zur=20Extraktion=20von=20Kanten-Kandidaten=20aus=20Wiki?=
 =?UTF-8?q?links=20und=20Callouts.=20Verbesserung=20der=20Satzverarbeitung?=
 =?UTF-8?q?=20durch=20die=20Implementierung=20der=20Funktion=20`split=5Fse?=
 =?UTF-8?q?ntences`.=20Aktualisierung=20der=20Sektions-Chunking-Strategie?=
 =?UTF-8?q?=20auf=20Version=203.9.6=20mit=20optimierter=20Handhabung=20von?=
 =?UTF-8?q?=20leeren=20=C3=9Cberschriften=20und=20Carry-Over=20Logik=20zur?=
 =?UTF-8?q?=20besseren=20Chunk-Erstellung.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 57 +++++++++++++++++-----
 app/core/chunking/chunking_strategies.py | 60 ++++++++++++++----------
 2 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index 1448932..efb1a65 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,13 +1,23 @@
 """
 FILE: app/core/chunking/chunking_parser.py
 DESCRIPTION: Zerlegt Markdown in atomare Blöcke. Hält H1-Überschriften im Stream
-             und gewährleistet die strukturelle Integrität von Callouts.
+             und extrahiert Kanten-Kandidaten (parse_edges_robust).
 """
 import re
 from typing import List, Tuple, Set
 from .chunking_models import RawBlock
 from .chunking_utils import extract_frontmatter_from_text
 
+_WS = re.compile(r'\s+')
+_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
+
+def split_sentences(text: str) -> list[str]:
+    """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
+    text = _WS.sub(' ', text.strip())
+    if not text: return []
+    # Splittet bei Punkt, Ausrufezeichen oder Fragezeichen, gefolgt von Leerzeichen und Großbuchstabe
+    return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
+
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
@@ -16,17 +26,18 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     
     # H1 für Metadaten extrahieren
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
-    if h1_match: h1_title = h1_match.group(1).strip()
+    if h1_match: 
+        h1_title = h1_match.group(1).strip()
     
     lines = text_without_fm.split('\n')
     buffer = []
     
     for line in lines:
         stripped = line.strip()
-        heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         
+        # Heading-Erkennung (H1 bis H6)
+        heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
-            # Vorherigen Text-Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -59,13 +70,37 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             
     if buffer:
         content = "\n".join(buffer).strip()
-        if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
+        if content: 
+            blocks.append(RawBlock("paragraph", content, None, section_path, current_section_title))
             
     return blocks, h1_title
 
-def split_sentences(text: str) -> list[str]:
-    """Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
-    text = re.sub(r'\s+', ' ', text.strip())
-    if not text: return []
-    # Splittet bei Satzzeichen, gefolgt von Leerzeichen und Großbuchstaben
-    return [s.strip() for s in re.split(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])', text) if s.strip()]
\ No newline at end of file
+def parse_edges_robust(text: str) -> Set[str]:
+    """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
+    found_edges = set()
+    # 1. Wikilinks [[rel:kind|target]]
+    inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
+    for kind, target in inlines:
+        k = kind.strip().lower()
+        t = target.strip()
+        if k and t: found_edges.add(f"{k}:{t}")
+    
+    # 2. Callout Edges > [!edge] kind
+    lines = text.split('\n')
+    current_edge_type = None
+    for line in lines:
+        stripped = line.strip()
+        callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
+        if callout_match:
+            current_edge_type = callout_match.group(1).strip().lower()
+            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
+            for l in links: 
+                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
+            continue
+        if current_edge_type and stripped.startswith('>'):
+            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
+            for l in links: 
+                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
+        elif not stripped.startswith('>'): 
+            current_edge_type = None
+    return found_edges
\ No newline at end of file
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index 562808b..ba04b68 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,7 +1,11 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategie für atomares Sektions-Chunking v3.9.5.
-             Implementiert das 'Pack-and-Carry-Over' Verfahren nach Nutzerwunsch.
+DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.6.
+             Implementiert das 'Pack-and-Carry-Over' Verfahren:
+             1. Packt ganze Abschnitte basierend auf Schätzung.
+             2. Kein physischer Overflow-Check während des Packens.
+             3. Smart-Zerlegung von Übergrößen mit Carry-Over in die Queue.
+             - Hard-Split-Logik für strict_heading_split integriert.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -16,10 +20,7 @@ def _create_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
 
 def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    Universelle Sektions-Strategie:
-    - Smart-Edge=True: Packt Sektionen basierend auf Schätzung (Regel 1-3).
-    - Smart-Edge=False: Hard Split an Überschriften (außer leere Header).
-    - Strict=True erzwingt Hard Split Verhalten innerhalb der Smart-Logik.
+    Universelle Heading-Strategie mit Carry-Over Logik.
     """
     smart_edge = config.get("enable_smart_edge_allocation", True)
     strict = config.get("strict_heading_split", False)
@@ -46,66 +47,73 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     for b in blocks:
         if b.kind == "heading" and b.level <= split_level:
             if curr_blocks:
-                sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), 
-                                 "meta": curr_blocks[0], "is_empty": len(curr_blocks) == 1})
+                sections.append({
+                    "text": "\n\n".join([x.text for x in curr_blocks]), 
+                    "meta": curr_blocks[0],
+                    "is_empty": len(curr_blocks) == 1 and curr_blocks[0].kind == "heading"
+                })
             curr_blocks = [b]
-        else: curr_blocks.append(b)
+        else:
+            curr_sec_has_content = True
+            curr_blocks.append(b)
     if curr_blocks:
-        sections.append({"text": "\n\n".join([x.text for x in curr_blocks]), 
-                         "meta": curr_blocks[0], "is_empty": len(curr_blocks) == 1})
+        sections.append({
+            "text": "\n\n".join([x.text for x in curr_blocks]), 
+            "meta": curr_blocks[0],
+            "is_empty": len(curr_blocks) == 1 and curr_blocks[0].kind == "heading"
+        })
 
     # --- SCHRITT 2: Verarbeitung der Queue ---
     queue = list(sections)
     current_chunk_text = ""
     current_meta = {"title": None, "path": "/"}
-
-    # Hard-Split-Bedingung: Entweder Smart-Edge aus ODER Profil ist Strict
+    
+    # Bestimmung des Modus: Hard-Split wenn smart_edge=False ODER strict=True
     is_hard_split_mode = (not smart_edge) or (strict)
 
     while queue:
         item = queue.pop(0)
         item_text = item["text"]
         
-        # Initialisierung für neuen Chunk
         if not current_chunk_text:
             current_meta["title"] = item["meta"].section_title
             current_meta["path"] = item["meta"].section_path
 
-        # FALL A: Hard Split Modus (Regel: Trenne bei jeder Sektion <= Level)
+        # FALL A: HARD SPLIT MODUS
         if is_hard_split_mode:
-            # Regel: Leere Überschriften verbleiben am nächsten Chunk
+            # Leere Überschriften (H1 vor H2) werden mit dem nächsten Item verschmolzen
             if item.get("is_empty", False) and queue:
                 current_chunk_text = (current_chunk_text + "\n\n" + item_text).strip()
-                continue # Nimm das nächste Item dazu
+                continue 
             
             combined = (current_chunk_text + "\n\n" + item_text).strip()
+            # Wenn durch das Verschmelzen das Limit gesprengt würde, flashen wir vorher
             if estimate_tokens(combined) > max_tokens and current_chunk_text:
-                # Falls es trotz Hard-Split zu groß wird, flashen wir erst den alten Teil
                 _emit(current_chunk_text, current_meta["title"], current_meta["path"])
                 current_chunk_text = item_text
             else:
                 current_chunk_text = combined
             
-            # Im Hard Split flashen wir nach jeder Sektion, die nicht leer ist
+            # Im Hard-Split wird nach jeder nicht-leeren Sektion geflasht
             _emit(current_chunk_text, current_meta["title"], current_meta["path"])
             current_chunk_text = ""
             continue
 
-        # FALL B: Smart Mode (Regel 1-3)
+        # FALL B: SMART MODE (Regel 1-3)
         combined_text = (current_chunk_text + "\n\n" + item_text).strip() if current_chunk_text else item_text
         combined_est = estimate_tokens(combined_text)
 
         if combined_est <= max_tokens:
-            # Regel 1 & 2: Passt nach Schätzung -> Aufnehmen
+            # Regel 1 & 2: Passt rein -> Aufnehmen
             current_chunk_text = combined_text
         else:
-            # Regel 3: Passt nicht -> Entweder Puffer flashen oder Item zerlegen
             if current_chunk_text:
+                # Regel 2: Flashen an Sektionsgrenze, Item zurücklegen
                 _emit(current_chunk_text, current_meta["title"], current_meta["path"])
                 current_chunk_text = ""
-                queue.insert(0, item) # Item für neuen Chunk zurücklegen
+                queue.insert(0, item)
             else:
-                # Einzelne Sektion zu groß -> Smart Zerlegung
+                # Regel 3: Einzelne Sektion zu groß -> Smart Zerlegung
                 sents = split_sentences(item_text)
                 header_prefix = item["meta"].text if item["meta"].kind == "heading" else ""
                 
@@ -119,11 +127,11 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                 
                 _emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
                 
-                # Carry-Over: Rest wird vorne in die Queue geschoben
                 if sents:
                     remainder = " ".join(sents)
                     if header_prefix and not remainder.startswith(header_prefix):
                         remainder = header_prefix + "\n\n" + remainder
+                    # Carry-Over: Rest wird vorne in die Queue geschoben
                     queue.insert(0, {"text": remainder, "meta": item["meta"], "is_split": True})
 
     if current_chunk_text:
@@ -132,7 +140,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
-    """Standard Sliding Window Strategie."""
+    """Basis-Sliding-Window für flache Texte."""
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
     for b in blocks:

From 65d697b7be3fe7033fc4c0fc20fc16f20274dbb0 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 07:54:54 +0100
Subject: [PATCH 26/33] =?UTF-8?q?Aktualisierung=20der=20atomaren=20Sektion?=
 =?UTF-8?q?s-Chunking-Strategie=20auf=20Version=203.9.8=20mit=20verbessert?=
 =?UTF-8?q?en=20Implementierungen=20des=20'Pack-and-Carry-Over'=20Verfahre?=
 =?UTF-8?q?ns.=20Einf=C3=BChrung=20von=20Look-Ahead=20zur=20strikten=20Ein?=
 =?UTF-8?q?haltung=20von=20Sektionsgrenzen=20und=20Vermeidung=20redundante?=
 =?UTF-8?q?r=20Kanten-Injektionen.=20Anpassungen=20an=20der=20Chunk-Erstel?=
 =?UTF-8?q?lung=20und=20Optimierung=20der=20Handhabung=20von=20leeren=20?=
 =?UTF-8?q?=C3=9Cberschriften.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/chunking/chunking_parser.py     | 21 +++++++++++-----
 app/core/chunking/chunking_strategies.py | 31 +++++++++++++-----------
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py
index efb1a65..e36ff0e 100644
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@@ -1,7 +1,8 @@
 """
 FILE: app/core/chunking/chunking_parser.py
-DESCRIPTION: Zerlegt Markdown in atomare Blöcke. Hält H1-Überschriften im Stream
-             und extrahiert Kanten-Kandidaten (parse_edges_robust).
+DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks). 
+             Hält alle Überschriftenebenen (H1-H6) im Stream.
+             Stellt die Funktion parse_edges_robust zur Verfügung.
 """
 import re
 from typing import List, Tuple, Set
@@ -21,10 +22,14 @@ def split_sentences(text: str) -> list[str]:
 def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     """Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6."""
     blocks = []
-    h1_title = "Dokument"; section_path = "/"; current_section_title = None
+    h1_title = "Dokument"
+    section_path = "/"
+    current_section_title = None
+    
+    # Frontmatter entfernen
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 für Metadaten extrahieren
+    # H1 für Note-Titel extrahieren (Metadaten-Zweck)
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: 
         h1_title = h1_match.group(1).strip()
@@ -38,6 +43,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
         # Heading-Erkennung (H1 bis H6)
         heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
         if heading_match:
+            # Vorherigen Text-Block abschließen
             if buffer:
                 content = "\n".join(buffer).strip()
                 if content: 
@@ -47,16 +53,17 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
             level = len(heading_match.group(1))
             title = heading_match.group(2).strip()
             
-            # Pfad- und Titel-Update
+            # Pfad- und Titel-Update für die Metadaten der folgenden Blöcke
             if level == 1:
                 current_section_title = title; section_path = "/"
             elif level == 2:
                 current_section_title = title; section_path = f"/{current_section_title}"
             
+            # Die Überschrift selbst als regulären Block hinzufügen
             blocks.append(RawBlock("heading", stripped, level, section_path, current_section_title))
             continue
 
-        # Trenner oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
+        # Trenner (---) oder Leerzeilen beenden Blöcke, außer innerhalb von Callouts
         if (not stripped or stripped == "---") and not line.startswith('>'):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -93,10 +100,12 @@ def parse_edges_robust(text: str) -> Set[str]:
         callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
         if callout_match:
             current_edge_type = callout_match.group(1).strip().lower()
+            # Links in der gleichen Zeile des Callouts
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
                 if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
             continue
+        # Links in Folgezeilen des Callouts
         if current_edge_type and stripped.startswith('>'):
             links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
             for l in links: 
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index ba04b68..e16121a 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,11 +1,9 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.6.
-             Implementiert das 'Pack-and-Carry-Over' Verfahren:
-             1. Packt ganze Abschnitte basierend auf Schätzung.
-             2. Kein physischer Overflow-Check während des Packens.
-             3. Smart-Zerlegung von Übergrößen mit Carry-Over in die Queue.
-             - Hard-Split-Logik für strict_heading_split integriert.
+DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.8.
+             Implementiert das 'Pack-and-Carry-Over' Verfahren nach Regel 1-3.
+             - Keine redundante Kanten-Injektion.
+             - Strikte Einhaltung von Sektionsgrenzen via Look-Ahead.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@@ -13,6 +11,7 @@ from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
 def _create_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
+    """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
     parts = [doc_title] if doc_title else []
     if sec_title and sec_title != doc_title: parts.append(sec_title)
     prefix = " > ".join(parts)
@@ -33,6 +32,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     chunks: List[Chunk] = []
 
     def _emit(txt, title, path):
+        """Schreibt den finalen Chunk ohne Text-Modifikationen."""
         idx = len(chunks)
         win = _create_win(doc_title, title, txt)
         chunks.append(Chunk(
@@ -54,7 +54,6 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                 })
             curr_blocks = [b]
         else:
-            curr_sec_has_content = True
             curr_blocks.append(b)
     if curr_blocks:
         sections.append({
@@ -75,26 +74,27 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         item = queue.pop(0)
         item_text = item["text"]
         
+        # Initialisierung für neuen Chunk
         if not current_chunk_text:
             current_meta["title"] = item["meta"].section_title
             current_meta["path"] = item["meta"].section_path
 
         # FALL A: HARD SPLIT MODUS
         if is_hard_split_mode:
-            # Leere Überschriften (H1 vor H2) werden mit dem nächsten Item verschmolzen
+            # Leere Überschriften (z.B. H1 direkt vor H2) verbleiben am nächsten Chunk
             if item.get("is_empty", False) and queue:
                 current_chunk_text = (current_chunk_text + "\n\n" + item_text).strip()
                 continue 
             
             combined = (current_chunk_text + "\n\n" + item_text).strip()
-            # Wenn durch das Verschmelzen das Limit gesprengt würde, flashen wir vorher
+            # Wenn durch Verschmelzung das Limit gesprengt würde, vorher flashen
             if estimate_tokens(combined) > max_tokens and current_chunk_text:
                 _emit(current_chunk_text, current_meta["title"], current_meta["path"])
                 current_chunk_text = item_text
             else:
                 current_chunk_text = combined
             
-            # Im Hard-Split wird nach jeder nicht-leeren Sektion geflasht
+            # Im Hard-Split wird nach jeder Sektion geflasht
             _emit(current_chunk_text, current_meta["title"], current_meta["path"])
             current_chunk_text = ""
             continue
@@ -104,7 +104,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
         combined_est = estimate_tokens(combined_text)
 
         if combined_est <= max_tokens:
-            # Regel 1 & 2: Passt rein -> Aufnehmen
+            # Regel 1 & 2: Passt rein laut Schätzung -> Aufnehmen
             current_chunk_text = combined_text
         else:
             if current_chunk_text:
@@ -119,8 +119,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                 
                 take_sents = []; take_len = 0
                 while sents:
-                    s = sents.pop(0)
-                    slen = estimate_tokens(s)
+                    s = sents.pop(0); slen = estimate_tokens(s)
                     if take_len + slen > target and take_sents:
                         sents.insert(0, s); break
                     take_sents.append(s); take_len += slen
@@ -129,6 +128,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
                 
                 if sents:
                     remainder = " ".join(sents)
+                    # Kontext-Erhalt: Überschrift für den Rest wiederholen
                     if header_prefix and not remainder.startswith(header_prefix):
                         remainder = header_prefix + "\n\n" + remainder
                     # Carry-Over: Rest wird vorne in die Queue geschoben
@@ -140,9 +140,10 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     return chunks
 
 def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
-    """Basis-Sliding-Window für flache Texte."""
+    """Standard-Sliding-Window für flache Texte ohne Sektionsfokus."""
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
+    
     for b in blocks:
         b_tokens = estimate_tokens(b.text)
         curr_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
@@ -152,8 +153,10 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
             chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=curr_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
         buf.append(b)
+        
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
         win = _create_win(doc_title, buf[0].section_title, txt)
         chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
+        
     return chunks
\ No newline at end of file

From 33dff04d4707c3e8d6f00b44e6f57783206c54fc Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 08:22:17 +0100
Subject: [PATCH 27/33] Fix v3.3.5: Prevent duplicate Wikilink targets in text
 by checking for existing references before injecting section edges. Update
 comments for clarity and maintain consistency in the code structure.

---
 app/core/chunking/chunking_propagation.py    | 21 ++++++-----
 app/core/ingestion/ingestion_note_payload.py | 39 ++++++++++++--------
 2 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/app/core/chunking/chunking_propagation.py b/app/core/chunking/chunking_propagation.py
index 099d075..af68442 100644
--- a/app/core/chunking/chunking_propagation.py
+++ b/app/core/chunking/chunking_propagation.py
@@ -1,9 +1,7 @@
 """
 FILE: app/core/chunking/chunking_propagation.py
 DESCRIPTION: Injiziert Sektions-Kanten physisch in den Text (Embedding-Enrichment).
-             Stellt die "Gold-Standard"-Qualität von v3.1.0 wieder her.
-VERSION: 3.3.1
-STATUS: Active
+             Fix v3.3.5: Erkennt Wikilink-Targets, um Dopplungen zu verhindern.
 """
 from typing import List, Dict, Set
 from .chunking_models import Chunk
@@ -12,7 +10,7 @@ from .chunking_parser import parse_edges_robust
 def propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
     """
     Sammelt Kanten pro Sektion und schreibt sie hart in den Text und das Window.
-    Dies ist essenziell für die Vektorisierung der Beziehungen.
+    Verhindert Dopplungen, wenn Kanten bereits via [!edge] Callout vorhanden sind.
     """
     # 1. Sammeln: Alle expliziten Kanten pro Sektions-Pfad aggregieren
     section_map: Dict[str, Set[str]] = {} # path -> set(kind:target)
@@ -39,18 +37,21 @@ def propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
             injections = []
             for e_str in edges_to_add:
                 kind, target = e_str.split(':', 1)
-                # Nur injizieren, wenn die Kante nicht bereits im Text steht
-                token = f"[[rel:{kind}|{target}]]"
-                if token not in ch.text:
-                    injections.append(token)
+                
+                # DER FIX: Wir prüfen, ob das Ziel (target) bereits im Text vorkommt.
+                # Wir suchen nach [[target]] (Callout-Stil) oder |target]] (Rel-Stil).
+                if f"[[{target}]]" in ch.text or f"|{target}]]" in ch.text:
+                    continue
+                
+                injections.append(f"[[rel:{kind}|{target}]]")
             
             if injections:
-                # Physische Anreicherung (Der v3.1.0 Qualitäts-Fix)
+                # Physische Anreicherung
                 # Triple-Newline für saubere Trennung im Embedding-Fenster
                 block = "\n\n\n" + " ".join(injections)
                 ch.text += block
                 
-                # ENTSCHEIDEND: Auch ins Window schreiben, da Qdrant hier sucht!
+                # Auch ins Window schreiben, da Qdrant hier sucht!
                 if ch.window:
                     ch.window += block
                 else:
diff --git a/app/core/ingestion/ingestion_note_payload.py b/app/core/ingestion/ingestion_note_payload.py
index d41410b..3df4d4a 100644
--- a/app/core/ingestion/ingestion_note_payload.py
+++ b/app/core/ingestion/ingestion_note_payload.py
@@ -3,9 +3,8 @@ FILE: app/core/ingestion/ingestion_note_payload.py
 DESCRIPTION: Baut das JSON-Objekt für mindnet_notes. 
 FEATURES: 
   - Multi-Hash (body/full) für flexible Change Detection.
-  - Fix v2.4.4: Integration der zentralen Registry (WP-14) für konsistente Defaults.
-VERSION: 2.4.4
-STATUS: Active
+  - Fix v2.4.5: Präzise Hash-Logik für Profil-Änderungen.
+  - Integration der zentralen Registry (WP-14).
 """
 from __future__ import annotations
 from typing import Any, Dict, Tuple, Optional
@@ -45,14 +44,22 @@ def _compute_hash(content: str) -> str:
     return hashlib.sha256(content.encode("utf-8")).hexdigest()
 
 def _get_hash_source_content(n: Dict[str, Any], mode: str) -> str:
-    """Generiert den Hash-Input-String basierend auf Body oder Metadaten."""
-    body = str(n.get("body") or "")
+    """
+    Generiert den Hash-Input-String basierend auf Body oder Metadaten.
+    Fix: Inkludiert nun alle entscheidungsrelevanten Profil-Parameter.
+    """
+    body = str(n.get("body") or "").strip()
     if mode == "body": return body
     if mode == "full":
         fm = n.get("frontmatter") or {}
         meta_parts = []
-        # Sortierte Liste für deterministische Hashes
-        for k in sorted(["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight"]):
+        # Wir inkludieren alle Felder, die das Chunking oder Retrieval beeinflussen
+        keys = [
+            "title", "type", "status", "tags", 
+            "chunking_profile", "chunk_profile", 
+            "retriever_weight", "split_level", "strict_heading_split"
+        ]
+        for k in sorted(keys):
             val = fm.get(k)
             if val is not None: meta_parts.append(f"{k}:{val}")
         return f"{'|'.join(meta_parts)}||{body}"
@@ -79,11 +86,11 @@ def _cfg_defaults(reg: dict) -> dict:
 def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
     """
     Baut das Note-Payload inklusive Multi-Hash und Audit-Validierung.
-    WP-14: Nutzt nun die zentrale Registry für alle Fallbacks.
+    WP-14: Nutzt die zentrale Registry für alle Fallbacks.
     """
     n = _as_dict(note)
     
-    # Nutzt übergebene Registry oder lädt sie global
+    # Registry & Context Settings
     reg = kwargs.get("types_cfg") or load_type_registry()
     hash_source = kwargs.get("hash_source", "parsed")
     hash_normalize = kwargs.get("hash_normalize", "canonical")
@@ -96,7 +103,6 @@ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
     ingest_cfg = reg.get("ingestion_settings", {})
 
     # --- retriever_weight Audit ---
-    # Priorität: Frontmatter -> Typ-Config -> globale Config -> Env-Var
     default_rw = float(os.environ.get("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0))
     retriever_weight = fm.get("retriever_weight")
     if retriever_weight is None:
@@ -107,14 +113,13 @@ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
         retriever_weight = default_rw
 
     # --- chunk_profile Audit ---
-    # Nutzt nun primär die ingestion_settings aus der Registry
     chunk_profile = fm.get("chunking_profile") or fm.get("chunk_profile")
     if chunk_profile is None:
         chunk_profile = cfg_type.get("chunking_profile") or cfg_type.get("chunk_profile")
     if chunk_profile is None:
         chunk_profile = ingest_cfg.get("default_chunk_profile", cfg_def.get("chunking_profile", "sliding_standard"))
 
-    # --- edge_defaults ---
+    # --- edge_defaults Audit ---
     edge_defaults = fm.get("edge_defaults")
     if edge_defaults is None:
         edge_defaults = cfg_type.get("edge_defaults", cfg_def.get("edge_defaults", []))
@@ -138,21 +143,23 @@ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
     }
     
     # --- MULTI-HASH ---
-    # Generiert Hashes für Change Detection
     for mode in ["body", "full"]:
         content = _get_hash_source_content(n, mode)
         payload["hashes"][f"{mode}:{hash_source}:{hash_normalize}"] = _compute_hash(content)
 
-    # Metadaten Anreicherung
+    # Metadaten Anreicherung (Tags, Aliases, Zeitstempel)
     tags = fm.get("tags") or fm.get("keywords") or n.get("tags")
     if tags: payload["tags"] = _ensure_list(tags)
-    if fm.get("aliases"): payload["aliases"] = _ensure_list(fm.get("aliases"))
+    
+    aliases = fm.get("aliases")
+    if aliases: payload["aliases"] = _ensure_list(aliases)
     
     for k in ("created", "modified", "date"):
         v = fm.get(k) or n.get(k)
         if v: payload[k] = str(v)
     
-    if n.get("body"): payload["fulltext"] = str(n["body"])
+    if n.get("body"): 
+        payload["fulltext"] = str(n["body"])
 
     # Final JSON Validation Audit
     json.loads(json.dumps(payload, ensure_ascii=False))

From 6aa6b32a6cb9c874b5db000dfedf7a11ddc24d64 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 08:40:19 +0100
Subject: [PATCH 28/33] Update chunking system to version 3.9.9, synchronizing
 parameters with the orchestrator and enhancing edge detection. Implement
 robust parsing to prevent duplicate edges in section propagation. Adjust
 comments for clarity and consistency across the codebase.

---
 app/core/chunking/chunking_processor.py      | 20 ++++++++----
 app/core/chunking/chunking_propagation.py    | 19 +++++++----
 app/core/chunking/chunking_strategies.py     | 22 +++++++------
 app/core/ingestion/ingestion_note_payload.py |  2 ++
 app/core/ingestion/ingestion_processor.py    | 34 ++++++++++++++++----
 5 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/app/core/chunking/chunking_processor.py b/app/core/chunking/chunking_processor.py
index 1a17acb..26c2b68 100644
--- a/app/core/chunking/chunking_processor.py
+++ b/app/core/chunking/chunking_processor.py
@@ -1,7 +1,8 @@
 """
 FILE: app/core/chunking/chunking_processor.py
 DESCRIPTION: Der zentrale Orchestrator für das Chunking-System.
-             AUDIT v3.3.3: Wiederherstellung der "Gold-Standard" Qualität.
+             AUDIT v3.3.4: Wiederherstellung der "Gold-Standard" Qualität.
+             - Fix: Synchronisierung der Parameter (context_prefix) für alle Strategien.
              - Integriert physikalische Kanten-Injektion (Propagierung).
              - Stellt H1-Kontext-Fenster sicher.
              - Baut den Candidate-Pool für die WP-15b Ingestion auf.
@@ -30,16 +31,19 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
     fm, body_text = extract_frontmatter_from_text(md_text)
     blocks, doc_title = parse_blocks(md_text)
     
-    # Vorbereitung des H1-Präfix für die Embedding-Fenster
+    # Vorbereitung des H1-Präfix für die Embedding-Fenster (Breadcrumbs)
     h1_prefix = f"# {doc_title}" if doc_title else ""
     
     # 2. Anwendung der Splitting-Strategie
-    # Wir übergeben den Dokument-Titel/Präfix für die Window-Bildung.
+    # Alle Strategien nutzen nun einheitlich context_prefix für die Window-Bildung.
     if config.get("strategy") == "by_heading":
-        chunks = await asyncio.to_thread(strategy_by_heading, blocks, config, note_id, doc_title)
+        chunks = await asyncio.to_thread(
+            strategy_by_heading, blocks, config, note_id, context_prefix=h1_prefix
+        )
     else:
-        # sliding_window nutzt nun den context_prefix für das Window-Feld.
-        chunks = await asyncio.to_thread(strategy_sliding_window, blocks, config, note_id, context_prefix=h1_prefix)
+        chunks = await asyncio.to_thread(
+            strategy_sliding_window, blocks, config, note_id, context_prefix=h1_prefix
+        )
 
     if not chunks: 
         return []
@@ -52,6 +56,7 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
     # Zuerst die explizit im Text vorhandenen Kanten sammeln.
     for ch in chunks:
         # Wir extrahieren aus dem bereits (durch Propagation) angereicherten Text.
+        # ch.candidate_pool wird im Modell-Konstruktor als leere Liste initialisiert.
         for e_str in parse_edges_robust(ch.text):
             parts = e_str.split(':', 1)
             if len(parts) == 2:
@@ -71,7 +76,7 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
             parts = e_str.split(':', 1)
             if len(parts) == 2:
                 k, t = parts
-                # Diese Kanten werden als "Global Pool" markiert für die spätere KI-Prüfung.
+                # Diese Kanten werden als "global_pool" markiert für die spätere KI-Prüfung.
                 for ch in chunks: 
                     ch.candidate_pool.append({"kind": k, "to": t, "provenance": "global_pool"})
 
@@ -80,6 +85,7 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
         seen = set()
         unique = []
         for c in ch.candidate_pool:
+            # Eindeutigkeit über Typ, Ziel und Herkunft (Provenance)
             key = (c["kind"], c["to"], c["provenance"])
             if key not in seen:
                 seen.add(key)
diff --git a/app/core/chunking/chunking_propagation.py b/app/core/chunking/chunking_propagation.py
index af68442..890b89e 100644
--- a/app/core/chunking/chunking_propagation.py
+++ b/app/core/chunking/chunking_propagation.py
@@ -1,7 +1,8 @@
 """
 FILE: app/core/chunking/chunking_propagation.py
 DESCRIPTION: Injiziert Sektions-Kanten physisch in den Text (Embedding-Enrichment).
-             Fix v3.3.5: Erkennt Wikilink-Targets, um Dopplungen zu verhindern.
+             Fix v3.3.6: Nutzt robustes Parsing zur Erkennung vorhandener Kanten,
+             um Dopplungen direkt hinter [!edge] Callouts format-agnostisch zu verhindern.
 """
 from typing import List, Dict, Set
 from .chunking_models import Chunk
@@ -34,15 +35,19 @@ def propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
             if not edges_to_add: 
                 continue
             
+            # Vorhandene Kanten (Typ:Ziel) in DIESEM Chunk ermitteln, 
+            # um Dopplungen (z.B. durch Callouts) zu vermeiden.
+            existing_edges = parse_edges_robust(ch.text)
+            
             injections = []
-            for e_str in edges_to_add:
-                kind, target = e_str.split(':', 1)
-                
-                # DER FIX: Wir prüfen, ob das Ziel (target) bereits im Text vorkommt.
-                # Wir suchen nach [[target]] (Callout-Stil) oder |target]] (Rel-Stil).
-                if f"[[{target}]]" in ch.text or f"|{target}]]" in ch.text:
+            # Sortierung für deterministische Ergebnisse
+            for e_str in sorted(list(edges_to_add)):
+                # Wenn die Kante (Typ + Ziel) bereits vorhanden ist (egal welches Format), 
+                # überspringen wir die Injektion für diesen Chunk.
+                if e_str in existing_edges:
                     continue
                 
+                kind, target = e_str.split(':', 1)
                 injections.append(f"[[rel:{kind}|{target}]]")
             
             if injections:
diff --git a/app/core/chunking/chunking_strategies.py b/app/core/chunking/chunking_strategies.py
index e16121a..5ca68fe 100644
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@@ -1,25 +1,29 @@
 """
 FILE: app/core/chunking/chunking_strategies.py
-DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.8.
+DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.9.
              Implementiert das 'Pack-and-Carry-Over' Verfahren nach Regel 1-3.
              - Keine redundante Kanten-Injektion.
              - Strikte Einhaltung von Sektionsgrenzen via Look-Ahead.
+             - Fix: Synchronisierung der Parameter mit dem Orchestrator (context_prefix).
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
 from .chunking_utils import estimate_tokens
 from .chunking_parser import split_sentences
 
-def _create_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
+def _create_win(context_prefix: str, sec_title: Optional[str], text: str) -> str:
     """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
-    parts = [doc_title] if doc_title else []
-    if sec_title and sec_title != doc_title: parts.append(sec_title)
+    parts = [context_prefix] if context_prefix else []
+    # Verhindert Dopplung, falls der Context-Prefix (H1) bereits den Sektionsnamen enthält
+    if sec_title and f"# {sec_title}" != context_prefix and sec_title not in (context_prefix or ""):
+        parts.append(sec_title)
     prefix = " > ".join(parts)
     return f"{prefix}\n{text}".strip() if prefix else text
 
-def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
+def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
     """
     Universelle Heading-Strategie mit Carry-Over Logik.
+    Synchronisiert auf context_prefix für Kompatibilität mit dem Orchestrator.
     """
     smart_edge = config.get("enable_smart_edge_allocation", True)
     strict = config.get("strict_heading_split", False)
@@ -34,7 +38,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
     def _emit(txt, title, path):
         """Schreibt den finalen Chunk ohne Text-Modifikationen."""
         idx = len(chunks)
-        win = _create_win(doc_title, title, txt)
+        win = _create_win(context_prefix, title, txt)
         chunks.append(Chunk(
             id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
             text=txt, window=win, token_count=estimate_tokens(txt),
@@ -139,7 +143,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
 
     return chunks
 
-def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
+def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
     """Standard-Sliding-Window für flache Texte ohne Sektionsfokus."""
     target = config.get("target", 400); max_tokens = config.get("max", 600)
     chunks: List[Chunk] = []; buf: List[RawBlock] = []
@@ -149,14 +153,14 @@ def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note
         curr_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
         if curr_tokens + b_tokens > max_tokens and buf:
             txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
-            win = _create_win(doc_title, buf[0].section_title, txt)
+            win = _create_win(context_prefix, buf[0].section_title, txt)
             chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=curr_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
             buf = []
         buf.append(b)
         
     if buf:
         txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
-        win = _create_win(doc_title, buf[0].section_title, txt)
+        win = _create_win(context_prefix, buf[0].section_title, txt)
         chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
         
     return chunks
\ No newline at end of file
diff --git a/app/core/ingestion/ingestion_note_payload.py b/app/core/ingestion/ingestion_note_payload.py
index 3df4d4a..5d30707 100644
--- a/app/core/ingestion/ingestion_note_payload.py
+++ b/app/core/ingestion/ingestion_note_payload.py
@@ -54,6 +54,7 @@ def _get_hash_source_content(n: Dict[str, Any], mode: str) -> str:
         fm = n.get("frontmatter") or {}
         meta_parts = []
         # Wir inkludieren alle Felder, die das Chunking oder Retrieval beeinflussen
+        # Jede Änderung hier führt nun zwingend zu einem neuen Full-Hash
         keys = [
             "title", "type", "status", "tags", 
             "chunking_profile", "chunk_profile", 
@@ -143,6 +144,7 @@ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
     }
     
     # --- MULTI-HASH ---
+    # Generiert Hashes für Change Detection (WP-15b)
     for mode in ["body", "full"]:
         content = _get_hash_source_content(n, mode)
         payload["hashes"][f"{mode}:{hash_source}:{hash_normalize}"] = _compute_hash(content)
diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py
index 8d6114d..e868401 100644
--- a/app/core/ingestion/ingestion_processor.py
+++ b/app/core/ingestion/ingestion_processor.py
@@ -4,8 +4,8 @@ DESCRIPTION: Der zentrale IngestionService (Orchestrator).
              WP-14: Modularisierung der Datenbank-Ebene (app.core.database).
              WP-15b: Two-Pass Workflow mit globalem Kontext-Cache.
              WP-20/22: Cloud-Resilienz und Content-Lifecycle integriert.
-             AUDIT v2.13.10: Umstellung auf app.core.database Infrastruktur.
-VERSION: 2.13.10
+             AUDIT v2.13.11: Synchronisierung mit Atomic-Chunking v3.9.9.
+VERSION: 2.13.11
 STATUS: Active
 """
 import logging
@@ -60,6 +60,7 @@ class IngestionService:
         self.embedder = EmbeddingsClient()
         self.llm = LLMService() 
         
+        # Festlegen, welcher Hash für die Change-Detection maßgeblich ist
         self.active_hash_mode = self.settings.CHANGE_DETECTION_MODE
         self.batch_cache: Dict[str, NoteContext] = {} # WP-15b LocalBatchCache
 
@@ -130,12 +131,18 @@ class IngestionService:
         )
         note_id = note_pl["note_id"]
 
+        # Abgleich mit der Datenbank (Qdrant)
         old_payload = None if force_replace else fetch_note_payload(self.client, self.prefix, note_id)
+        
+        # Prüfung gegen den konfigurierten Hash-Modus (body vs. full)
         check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
         old_hash = (old_payload or {}).get("hashes", {}).get(check_key)
         new_hash = note_pl.get("hashes", {}).get(check_key)
         
+        # Check ob Chunks oder Kanten in der DB fehlen (Reparatur-Modus)
         c_miss, e_miss = artifacts_missing(self.client, self.prefix, note_id)
+        
+        # Wenn Hash identisch und Artefakte vorhanden -> Skip
         if not (force_replace or not old_payload or old_hash != new_hash or c_miss or e_miss):
             return {**result, "status": "unchanged", "note_id": note_id}
         
@@ -146,36 +153,46 @@ class IngestionService:
         try:
             body_text = getattr(parsed, "body", "") or ""
             edge_registry.ensure_latest()
+            
+            # Profil-Auflösung via Registry
             profile = fm.get("chunk_profile") or fm.get("chunking_profile") or "sliding_standard"
             chunk_cfg = get_chunk_config_by_profile(self.registry, profile, note_type)
             enable_smart = chunk_cfg.get("enable_smart_edge_allocation", False)
             
-            # WP-15b: Chunker-Aufruf bereitet Candidate-Pool vor
+            # WP-15b: Chunker-Aufruf bereitet den Candidate-Pool pro Chunk vor.
+            # assemble_chunks (v3.3.4) führt intern auch die Propagierung durch.
             chunks = await assemble_chunks(note_id, body_text, note_type, config=chunk_cfg)
+            
+            # Semantische Kanten-Validierung (Smart Edge Allocation)
             for ch in chunks:
                 filtered = []
                 for cand in getattr(ch, "candidate_pool", []):
-                    # WP-15b: Nur global_pool Kandidaten erfordern binäre Validierung
+                    # Nur global_pool Kandidaten (aus dem Pool am Ende) erfordern KI-Validierung
                     if cand.get("provenance") == "global_pool" and enable_smart:
                         if await validate_edge_candidate(ch.text, cand, self.batch_cache, self.llm, self.settings.MINDNET_LLM_PROVIDER):
                             filtered.append(cand)
                     else: 
+                        # Explizite Kanten (Wikilinks/Callouts) werden ungeprüft übernommen
                         filtered.append(cand)
                 ch.candidate_pool = filtered
 
-            # Payload-Erstellung via interne Module
+            # Payload-Erstellung für die Chunks
             chunk_pls = make_chunk_payloads(
                 fm, note_pl["path"], chunks, file_path=file_path, 
                 types_cfg=self.registry
             )
+            
+            # Vektorisierung der Fenster-Texte
             vecs = await self.embedder.embed_documents([c.get("window") or "" for c in chunk_pls]) if chunk_pls else []
             
-            # Kanten-Aggregation
+            # Aggregation aller finalen Kanten (Edges)
             edges = build_edges_for_note(
                 note_id, chunk_pls, 
                 note_level_references=note_pl.get("references", []),
                 include_note_scope_refs=note_scope_refs
             )
+            
+            # Kanten-Typen via Registry validieren/auflösen
             for e in edges:
                 e["kind"] = edge_registry.resolve(
                     e.get("kind", "related_to"), 
@@ -184,16 +201,20 @@ class IngestionService:
                 )
 
             # 4. DB Upsert via modularisierter Points-Logik
+            # WICHTIG: Wenn sich der Inhalt geändert hat, löschen wir erst alle alten Fragmente.
             if purge_before and old_payload: 
                 purge_artifacts(self.client, self.prefix, note_id)
             
+            # Speichern der Haupt-Note
             n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
             upsert_batch(self.client, n_name, n_pts)
             
+            # Speichern der Chunks
             if chunk_pls and vecs: 
                 c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)[1]
                 upsert_batch(self.client, f"{self.prefix}_chunks", c_pts)
             
+            # Speichern der Kanten
             if edges: 
                 e_pts = points_for_edges(self.prefix, edges)[1]
                 upsert_batch(self.client, f"{self.prefix}_edges", e_pts)
@@ -217,4 +238,5 @@ class IngestionService:
         with open(target_path, "w", encoding="utf-8") as f: 
             f.write(markdown_content)
         await asyncio.sleep(0.1) 
+        # Triggert sofortigen Import mit force_replace/purge_before
         return await self.process_file(file_path=target_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)
\ No newline at end of file

From ef8cf719f2c6497cd26694642b2b2e5ee6b2286e Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 09:11:55 +0100
Subject: [PATCH 29/33] Update ingestion processor to version 2.13.12,
 synchronizing profile resolution with registry defaults. Refactor profile
 retrieval logic to utilize the profile determined in make_note_payload,
 ensuring consistency in chunk configuration.

---
 app/core/ingestion/ingestion_processor.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py
index e868401..22ae909 100644
--- a/app/core/ingestion/ingestion_processor.py
+++ b/app/core/ingestion/ingestion_processor.py
@@ -4,8 +4,8 @@ DESCRIPTION: Der zentrale IngestionService (Orchestrator).
              WP-14: Modularisierung der Datenbank-Ebene (app.core.database).
              WP-15b: Two-Pass Workflow mit globalem Kontext-Cache.
              WP-20/22: Cloud-Resilienz und Content-Lifecycle integriert.
-             AUDIT v2.13.11: Synchronisierung mit Atomic-Chunking v3.9.9.
-VERSION: 2.13.11
+             AUDIT v2.13.12: Synchronisierung der Profil-Auflösung mit Registry-Defaults.
+VERSION: 2.13.12
 STATUS: Active
 """
 import logging
@@ -155,12 +155,15 @@ class IngestionService:
             edge_registry.ensure_latest()
             
             # Profil-Auflösung via Registry
-            profile = fm.get("chunk_profile") or fm.get("chunking_profile") or "sliding_standard"
+            # FIX: Wir nutzen das Profil, das bereits in make_note_payload unter 
+            # Berücksichtigung der types.yaml (Registry) ermittelt wurde.
+            profile = note_pl.get("chunk_profile", "sliding_standard")
+            
             chunk_cfg = get_chunk_config_by_profile(self.registry, profile, note_type)
             enable_smart = chunk_cfg.get("enable_smart_edge_allocation", False)
             
             # WP-15b: Chunker-Aufruf bereitet den Candidate-Pool pro Chunk vor.
-            # assemble_chunks (v3.3.4) führt intern auch die Propagierung durch.
+            # assemble_chunks führt intern auch die Propagierung durch.
             chunks = await assemble_chunks(note_id, body_text, note_type, config=chunk_cfg)
             
             # Semantische Kanten-Validierung (Smart Edge Allocation)

From ef1046c6f51067a112df09c69a086dcfdbf86866 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 09:26:38 +0100
Subject: [PATCH 30/33] Enhance callout relation extraction by ensuring correct
 termination on new headers. Update regex for simple kinds to support hyphens.
 Refactor block processing logic for improved clarity and functionality.

---
 app/core/graph/graph_extractors.py | 144 +++++++++++++++--------------
 1 file changed, 76 insertions(+), 68 deletions(-)

diff --git a/app/core/graph/graph_extractors.py b/app/core/graph/graph_extractors.py
index 690e561..b8785e4 100644
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@@ -2,8 +2,8 @@
 FILE: app/core/graph/graph_extractors.py
 DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
              AUDIT: 
+             - FIX: extract_callout_relations stoppt nun korrekt bei neuem Header.
              - Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen).
-             - Callout-Parser erweitert für Multi-Line-Listen und Header-Typen.
 """
 import re
 from typing import List, Tuple
@@ -16,10 +16,8 @@ _REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\
 _REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
 
 _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
-# Erkennt "kind: targets..."
 _REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
-# Erkennt reine Typen (z.B. "depends_on" im Header)
-_SIMPLE_KIND   = re.compile(r"^[a-z_]+$", re.IGNORECASE)
+_SIMPLE_KIND   = re.compile(r"^[a-z_\-]+$", re.IGNORECASE)
 
 def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
     """
@@ -40,9 +38,7 @@ def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
 def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
     """
     Verarbeitet Obsidian [!edge]-Callouts.
-    Unterstützt zwei Formate:
-    1. Explizit: "kind: [[Target]]"
-    2. Implizit (Header): "> [!edge] kind" gefolgt von "[[Target]]" Zeilen
+    Stoppt korrekt, wenn ein neuer Header innerhalb eines Blocks gefunden wird.
     """
     if not text: return [], text
     lines = text.splitlines()
@@ -52,76 +48,88 @@ def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
     
     while i < len(lines):
         line = lines[i]
+        
+        # 1. Start eines Blocks erkannt
         m = _CALLOUT_START.match(line)
-        if not m:
-            keep_lines.append(line)
-            i += 1
-            continue
-        
-        # Callout-Block gefunden. Wir sammeln alle relevanten Zeilen.
-        block_lines = []
-        
-        # Header Content prüfen (z.B. "type" aus "> [!edge] type")
-        header_raw = m.group(1).strip()
-        if header_raw:
-            block_lines.append(header_raw)
+        if m:
+            block_lines = []
+            header_raw = m.group(1).strip()
+            if header_raw:
+                block_lines.append(header_raw)
             
-        i += 1
-        while i < len(lines) and lines[i].lstrip().startswith('>'):
-            # Entferne '>' und führende Leerzeichen
-            content = lines[i].lstrip()[1:].lstrip()
-            if content:
-                block_lines.append(content)
             i += 1
+            # Sammle Folgezeilen, solange sie mit '>' beginnen UND KEIN neuer Header sind
+            while i < len(lines) and lines[i].lstrip().startswith('>'):
+                # STOP-CHECK: Ist das ein neuer Header?
+                if _CALLOUT_START.match(lines[i]):
+                    break # Breche inneren Loop ab -> Outer Loop behandelt den neuen Header
+                
+                content = lines[i].lstrip()[1:].lstrip()
+                if content:
+                    block_lines.append(content)
+                i += 1
             
-        # Verarbeitung des Blocks
-        current_kind = None
-        
-        # Heuristik: Ist die allererste Zeile (meist aus dem Header) ein reiner Typ?
-        # Dann setzen wir diesen als Default für den Block.
-        if block_lines:
-            first = block_lines[0]
-            # Wenn es NICHT wie "Key: Value" aussieht, aber wie ein Wort:
-            if not _REL_LINE.match(first) and _SIMPLE_KIND.match(first):
-                current_kind = first.lower()
+            _process_block(block_lines, out_pairs)
+            continue # Weiter im Outer Loop (i steht jetzt auf dem nächsten Header oder Text)
+
+        # 2. "Headless" Block / Zerschnittener Chunk
+        # Wenn Zeile mit '>' beginnt, Links hat, aber wir nicht in einem Header-Block sind
+        if line.lstrip().startswith('>'):
+            if _WIKILINK_RE.search(line):
+                block_lines = []
+                # Sammeln bis Ende oder neuer Header
+                while i < len(lines) and lines[i].lstrip().startswith('>'):
+                    if _CALLOUT_START.match(lines[i]):
+                        break
+                    
+                    content = lines[i].lstrip()[1:].lstrip()
+                    if content:
+                        block_lines.append(content)
+                    i += 1
                 
-        for bl in block_lines:
-            # 1. Prüfen auf explizites "Kind: Targets" (überschreibt Header-Typ für diese Zeile)
-            mrel = _REL_LINE.match(bl)
-            if mrel:
-                line_kind = mrel.group("kind").strip().lower()
-                targets = mrel.group("targets")
-                
-                # Links extrahieren
-                found = _WIKILINK_RE.findall(targets)
-                if found:
-                    for t in found: out_pairs.append((line_kind, t.strip()))
-                else:
-                    # Fallback für kommagetrennten Plaintext
-                    for raw in re.split(r"[,;]", targets):
-                        if raw.strip(): out_pairs.append((line_kind, raw.strip()))
-                
-                # Wenn wir eine explizite Zeile gefunden haben, aktualisieren wir NICHT 
-                # den current_kind für nachfolgende Zeilen (Design-Entscheidung: lokal scope),
-                # oder wir machen es doch? 
-                # Üblicher ist: Header setzt Default, Zeile überschreibt lokal. 
-                # Wir lassen current_kind also unangetastet.
+                # Als 'related_to' retten, falls Typ fehlt
+                _process_block(block_lines, out_pairs, default_kind="related_to")
                 continue
-            
-            # 2. Kein Key:Value Muster -> Prüfen auf Links, die den current_kind nutzen
-            found = _WIKILINK_RE.findall(bl)
-            if found:
-                if current_kind:
-                    for t in found: out_pairs.append((current_kind, t.strip()))
-                else:
-                    # Link ohne Typ und ohne Header-Typ.
-                    # Wird ignoriert oder könnte als 'related_to' fallback dienen.
-                    # Aktuell: Ignorieren, um False Positives zu vermeiden.
-                    pass
+        
+        keep_lines.append(line)
+        i += 1
 
     return out_pairs, "\n".join(keep_lines)
 
+def _process_block(lines: List[str], out_pairs: List[Tuple[str, str]], default_kind: str = None):
+    """Parsen eines isolierten Blocks."""
+    current_kind = default_kind
+    
+    if lines:
+        first = lines[0]
+        # Ist die erste Zeile ein Typ? (z.B. "based_on")
+        if not _REL_LINE.match(first) and _SIMPLE_KIND.match(first):
+            current_kind = first.lower()
+            
+    for bl in lines:
+        # Format "kind: [[Target]]"
+        mrel = _REL_LINE.match(bl)
+        if mrel:
+            k = mrel.group("kind").strip().lower()
+            targets = mrel.group("targets")
+            found = _WIKILINK_RE.findall(targets)
+            if found:
+                for t in found: out_pairs.append((k, t.strip()))
+            else:
+                for raw in re.split(r"[,;]", targets):
+                    if raw.strip(): out_pairs.append((k, raw.strip()))
+            continue
+            
+        # Format "[[Target]]" (nutzt current_kind)
+        found = _WIKILINK_RE.findall(bl)
+        if found:
+            if current_kind:
+                for t in found: out_pairs.append((current_kind, t.strip()))
+            else:
+                # Fallback ohne Typ
+                for t in found: out_pairs.append(("related_to", t.strip()))
+
 def extract_wikilinks(text: str) -> List[str]:
-    """Findet Standard-Wikilinks [[Target]] oder [[Alias|Target]]."""
+    """Findet Standard-Wikilinks."""
     if not text: return []
     return [m.strip() for m in _WIKILINK_RE.findall(text) if m.strip()]
\ No newline at end of file

From 4327fc939cdc5218f8926ac95a1518b4471bde5b Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 09:40:30 +0100
Subject: [PATCH 31/33] =?UTF-8?q?zr=C3=BCck=20zur=20Vorversion=20zum=20Tes?=
 =?UTF-8?q?t=20der=20LLM=20checks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/core/graph/graph_extractors.py | 148 ++++++++++++++---------------
 1 file changed, 70 insertions(+), 78 deletions(-)

diff --git a/app/core/graph/graph_extractors.py b/app/core/graph/graph_extractors.py
index b8785e4..690e561 100644
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@@ -2,8 +2,8 @@
 FILE: app/core/graph/graph_extractors.py
 DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
              AUDIT: 
-             - FIX: extract_callout_relations stoppt nun korrekt bei neuem Header.
              - Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen).
+             - Callout-Parser erweitert für Multi-Line-Listen und Header-Typen.
 """
 import re
 from typing import List, Tuple
@@ -16,8 +16,10 @@ _REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\
 _REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
 
 _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
+# Erkennt "kind: targets..."
 _REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
-_SIMPLE_KIND   = re.compile(r"^[a-z_\-]+$", re.IGNORECASE)
+# Erkennt reine Typen (z.B. "depends_on" im Header)
+_SIMPLE_KIND   = re.compile(r"^[a-z_]+$", re.IGNORECASE)
 
 def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
     """
@@ -38,7 +40,9 @@ def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
 def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
     """
     Verarbeitet Obsidian [!edge]-Callouts.
-    Stoppt korrekt, wenn ein neuer Header innerhalb eines Blocks gefunden wird.
+    Unterstützt zwei Formate:
+    1. Explizit: "kind: [[Target]]"
+    2. Implizit (Header): "> [!edge] kind" gefolgt von "[[Target]]" Zeilen
     """
     if not text: return [], text
     lines = text.splitlines()
@@ -48,88 +52,76 @@ def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
     
     while i < len(lines):
         line = lines[i]
-        
-        # 1. Start eines Blocks erkannt
         m = _CALLOUT_START.match(line)
-        if m:
-            block_lines = []
-            header_raw = m.group(1).strip()
-            if header_raw:
-                block_lines.append(header_raw)
-            
+        if not m:
+            keep_lines.append(line)
             i += 1
-            # Sammle Folgezeilen, solange sie mit '>' beginnen UND KEIN neuer Header sind
-            while i < len(lines) and lines[i].lstrip().startswith('>'):
-                # STOP-CHECK: Ist das ein neuer Header?
-                if _CALLOUT_START.match(lines[i]):
-                    break # Breche inneren Loop ab -> Outer Loop behandelt den neuen Header
-                
-                content = lines[i].lstrip()[1:].lstrip()
-                if content:
-                    block_lines.append(content)
-                i += 1
-            
-            _process_block(block_lines, out_pairs)
-            continue # Weiter im Outer Loop (i steht jetzt auf dem nächsten Header oder Text)
-
-        # 2. "Headless" Block / Zerschnittener Chunk
-        # Wenn Zeile mit '>' beginnt, Links hat, aber wir nicht in einem Header-Block sind
-        if line.lstrip().startswith('>'):
-            if _WIKILINK_RE.search(line):
-                block_lines = []
-                # Sammeln bis Ende oder neuer Header
-                while i < len(lines) and lines[i].lstrip().startswith('>'):
-                    if _CALLOUT_START.match(lines[i]):
-                        break
-                    
-                    content = lines[i].lstrip()[1:].lstrip()
-                    if content:
-                        block_lines.append(content)
-                    i += 1
-                
-                # Als 'related_to' retten, falls Typ fehlt
-                _process_block(block_lines, out_pairs, default_kind="related_to")
-                continue
+            continue
         
-        keep_lines.append(line)
+        # Callout-Block gefunden. Wir sammeln alle relevanten Zeilen.
+        block_lines = []
+        
+        # Header Content prüfen (z.B. "type" aus "> [!edge] type")
+        header_raw = m.group(1).strip()
+        if header_raw:
+            block_lines.append(header_raw)
+            
         i += 1
+        while i < len(lines) and lines[i].lstrip().startswith('>'):
+            # Entferne '>' und führende Leerzeichen
+            content = lines[i].lstrip()[1:].lstrip()
+            if content:
+                block_lines.append(content)
+            i += 1
+            
+        # Verarbeitung des Blocks
+        current_kind = None
+        
+        # Heuristik: Ist die allererste Zeile (meist aus dem Header) ein reiner Typ?
+        # Dann setzen wir diesen als Default für den Block.
+        if block_lines:
+            first = block_lines[0]
+            # Wenn es NICHT wie "Key: Value" aussieht, aber wie ein Wort:
+            if not _REL_LINE.match(first) and _SIMPLE_KIND.match(first):
+                current_kind = first.lower()
+                
+        for bl in block_lines:
+            # 1. Prüfen auf explizites "Kind: Targets" (überschreibt Header-Typ für diese Zeile)
+            mrel = _REL_LINE.match(bl)
+            if mrel:
+                line_kind = mrel.group("kind").strip().lower()
+                targets = mrel.group("targets")
+                
+                # Links extrahieren
+                found = _WIKILINK_RE.findall(targets)
+                if found:
+                    for t in found: out_pairs.append((line_kind, t.strip()))
+                else:
+                    # Fallback für kommagetrennten Plaintext
+                    for raw in re.split(r"[,;]", targets):
+                        if raw.strip(): out_pairs.append((line_kind, raw.strip()))
+                
+                # Wenn wir eine explizite Zeile gefunden haben, aktualisieren wir NICHT 
+                # den current_kind für nachfolgende Zeilen (Design-Entscheidung: lokal scope),
+                # oder wir machen es doch? 
+                # Üblicher ist: Header setzt Default, Zeile überschreibt lokal. 
+                # Wir lassen current_kind also unangetastet.
+                continue
+            
+            # 2. Kein Key:Value Muster -> Prüfen auf Links, die den current_kind nutzen
+            found = _WIKILINK_RE.findall(bl)
+            if found:
+                if current_kind:
+                    for t in found: out_pairs.append((current_kind, t.strip()))
+                else:
+                    # Link ohne Typ und ohne Header-Typ.
+                    # Wird ignoriert oder könnte als 'related_to' fallback dienen.
+                    # Aktuell: Ignorieren, um False Positives zu vermeiden.
+                    pass
 
     return out_pairs, "\n".join(keep_lines)
 
-def _process_block(lines: List[str], out_pairs: List[Tuple[str, str]], default_kind: str = None):
-    """Parsen eines isolierten Blocks."""
-    current_kind = default_kind
-    
-    if lines:
-        first = lines[0]
-        # Ist die erste Zeile ein Typ? (z.B. "based_on")
-        if not _REL_LINE.match(first) and _SIMPLE_KIND.match(first):
-            current_kind = first.lower()
-            
-    for bl in lines:
-        # Format "kind: [[Target]]"
-        mrel = _REL_LINE.match(bl)
-        if mrel:
-            k = mrel.group("kind").strip().lower()
-            targets = mrel.group("targets")
-            found = _WIKILINK_RE.findall(targets)
-            if found:
-                for t in found: out_pairs.append((k, t.strip()))
-            else:
-                for raw in re.split(r"[,;]", targets):
-                    if raw.strip(): out_pairs.append((k, raw.strip()))
-            continue
-            
-        # Format "[[Target]]" (nutzt current_kind)
-        found = _WIKILINK_RE.findall(bl)
-        if found:
-            if current_kind:
-                for t in found: out_pairs.append((current_kind, t.strip()))
-            else:
-                # Fallback ohne Typ
-                for t in found: out_pairs.append(("related_to", t.strip()))
-
 def extract_wikilinks(text: str) -> List[str]:
-    """Findet Standard-Wikilinks."""
+    """Findet Standard-Wikilinks [[Target]] oder [[Alias|Target]]."""
     if not text: return []
     return [m.strip() for m in _WIKILINK_RE.findall(text) if m.strip()]
\ No newline at end of file

From beb87a8c43e5397e882da74c2dd228456bfbd278 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 12:16:58 +0100
Subject: [PATCH 32/33] Update documentation to version 2.9.1, introducing
 support for section-based links and multigraph functionality. Enhance
 glossary, user manual, and technical references to reflect new deep-linking
 capabilities and edge structure adjustments. Ensure proper migration
 instructions for users transitioning to the new version.

---
 docs/00_General/00_glossary.md                |  9 ++--
 docs/01_User_Manual/01_knowledge_design.md    | 14 ++++-
 docs/02_concepts/02_concept_graph_logic.md    | 28 ++++++++--
 .../03_tech_api_reference.md                  |  5 +-
 .../03_tech_data_model.md                     | 16 ++++--
 .../03_tech_frontend.md                       |  2 +-
 .../03_tech_ingestion_pipeline.md             | 52 ++++++++++++++-----
 docs/04_Operations/04_admin_operations.md     |  9 +++-
 8 files changed, 109 insertions(+), 26 deletions(-)

diff --git a/docs/00_General/00_glossary.md b/docs/00_General/00_glossary.md
index e14ead9..3ff270b 100644
--- a/docs/00_General/00_glossary.md
+++ b/docs/00_General/00_glossary.md
@@ -2,7 +2,7 @@
 doc_type: glossary
 audience: all
 status: active
-version: 2.8.1
+version: 2.9.1
 context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-Cloud Resilienz, WP-14 Modularisierung, WP-15b Two-Pass Ingestion und Mistral-safe Parsing."
 ---
 
@@ -14,7 +14,7 @@ context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-C
 
 * **Note:** Repräsentiert eine Markdown-Datei. Die fachliche Haupteinheit. Verfügt über einen **Status** (stable, draft, system), der das Scoring beeinflusst.
 * **Chunk:** Ein Textabschnitt einer Note. Die technische Sucheinheit (Vektor).
-* **Edge:** Eine gerichtete Verbindung zwischen zwei Knoten. Wird in WP-22 durch die Registry validiert.
+* **Edge:** Eine gerichtete Verbindung zwischen zwei Knoten. Wird in WP-22 durch die Registry validiert. Seit v2.9.1 unterstützt Edges **Section-basierte Links** (`target_section`), sodass mehrere Kanten zwischen denselben Knoten existieren können, wenn sie auf verschiedene Abschnitte zeigen.
 * **Vault:** Der lokale Ordner mit den Markdown-Dateien (Source of Truth).
 * **Frontmatter:** Der YAML-Header am Anfang einer Notiz (enthält `id`, `type`, `title`, `status`).
 
@@ -47,4 +47,7 @@ context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-C
 * **Two-Pass Workflow (WP-15b):** Optimiertes Ingestion-Verfahren:
     * **Pass 1 (Pre-Scan):** Schnelles Scannen aller Dateien zur Befüllung des LocalBatchCache.
     * **Pass 2 (Semantic Processing):** Tiefenverarbeitung (Chunking, Embedding, Validierung) nur für geänderte Dateien.
-* **Circular Import Registry (WP-14):** Entkopplung von Kern-Logik (wie Textbereinigung) in eine neutrale `registry.py`, um Abhängigkeitsschleifen zwischen Diensten und Ingestion-Utilities zu verhindern.
\ No newline at end of file
+* **Circular Import Registry (WP-14):** Entkopplung von Kern-Logik (wie Textbereinigung) in eine neutrale `registry.py`, um Abhängigkeitsschleifen zwischen Diensten und Ingestion-Utilities zu verhindern.
+* **Deep-Link / Section-basierter Link:** Ein Link wie `[[Note#Section]]`, der auf einen spezifischen Abschnitt innerhalb einer Note verweist. Seit v2.9.1 wird dieser in `target_id="Note"` und `target_section="Section"` aufgeteilt, um "Phantom-Knoten" zu vermeiden und Multigraph-Support zu ermöglichen.
+* **Atomic Section Logic (v3.9.9):** Chunking-Verfahren, das Sektions-Überschriften und deren Inhalte atomar in Chunks hält (Pack-and-Carry-Over). Verhindert, dass Überschriften über Chunk-Grenzen hinweg getrennt werden.
+* **Registry-First Profiling (v2.13.12):** Hierarchische Auflösung des Chunking-Profils: Frontmatter > types.yaml Typ-Config > Global Defaults. Stellt sicher, dass Note-Typen automatisch das korrekte Profil erhalten.
\ No newline at end of file
diff --git a/docs/01_User_Manual/01_knowledge_design.md b/docs/01_User_Manual/01_knowledge_design.md
index ed2f3b3..885664e 100644
--- a/docs/01_User_Manual/01_knowledge_design.md
+++ b/docs/01_User_Manual/01_knowledge_design.md
@@ -3,7 +3,7 @@ doc_type: user_manual
 audience: user, author
 scope: vault, markdown, schema
 status: active
-version: 2.8.0
+version: 2.9.1
 context: "Regelwerk für das Erstellen von Notizen im Vault. Die 'Source of Truth' für Autoren."
 ---
 
@@ -208,6 +208,12 @@ Dies ist die **mächtigste** Methode. Du sagst dem System explizit, **wie** Ding
 > "Daher [[rel:depends_on Qdrant]]."
 > "Dieses Konzept ist [[rel:similar_to Pinecone]]."
 
+**Deep-Links zu Abschnitten (v2.9.1):**
+Du kannst auch auf spezifische Abschnitte innerhalb einer Note verlinken:
+> "Siehe [[rel:based_on Mein Leitbild#P3 – Disziplin]]."
+
+Das System trennt automatisch den Note-Namen (`Mein Leitbild`) vom Abschnitts-Namen (`P3 – Disziplin`), sodass mehrere Links zur gleichen Note möglich sind, wenn sie auf verschiedene Abschnitte zeigen.
+
 **Gültige Relationen:**
 * `depends_on`: Hängt ab von / Benötigt.
 * `blocks`: Blockiert oder gefährdet (z.B. Risiko -> Projekt).
@@ -226,6 +232,12 @@ Für Zusammenfassungen am Ende einer Notiz, oder eines Absatzes:
 >  [[AI Agents]]
 ```
 
+**Multi-Line Support (v2.9.1):**
+Callout-Blocks mit mehreren Zeilen werden korrekt verarbeitet. Das System erkennt automatisch, wenn mehrere Links im gleichen Callout-Block stehen, und erstellt für jeden Link eine separate Kante (auch bei Deep-Links zu verschiedenen Sections).
+
+**Format-agnostische De-Duplizierung:**
+Wenn Kanten bereits via `[!edge]` Callout vorhanden sind, werden sie nicht mehrfach injiziert. Das System erkennt vorhandene Kanten unabhängig vom Format (Inline, Callout, Wikilink).
+
 ### 4.3 Implizite Bidirektionalität (Edger-Logik) [NEU] [PRÜFEN!]
 In Mindnet musst du Kanten **nicht** manuell in beide Richtungen pflegen. Der **Edger** übernimmt die Paarbildung automatisch im Hintergrund.
 
diff --git a/docs/02_concepts/02_concept_graph_logic.md b/docs/02_concepts/02_concept_graph_logic.md
index 6c08e4c..b7f89a7 100644
--- a/docs/02_concepts/02_concept_graph_logic.md
+++ b/docs/02_concepts/02_concept_graph_logic.md
@@ -3,7 +3,7 @@ doc_type: concept
 audience: architect, product_owner
 scope: graph, logic, provenance
 status: active
-version: 2.7.0
+version: 2.9.1
 context: "Fachliche Beschreibung des Wissensgraphen: Knoten, Kanten, Provenance, Matrix-Logik und WP-22 Scoring-Prinzipien."
 ---
 
@@ -118,8 +118,30 @@ Der Intent-Router injiziert spezifische Multiplikatoren für kanonische Typen:
 
 ---
 
-## 6. Idempotenz & Konsistenz
+## 6. Section-basierte Links & Multigraph-Support
+
+Seit v2.9.1 unterstützt Mindnet **Deep-Links** zu spezifischen Abschnitten innerhalb einer Note.
+
+### 6.1 Link-Parsing
+Links wie `[[Note#Section]]` werden in zwei Komponenten aufgeteilt:
+* **`target_id`:** Enthält nur den Note-Namen (z.B. "Mein Leitbild")
+* **`target_section`:** Enthält den Abschnitts-Namen (z.B. "P3 – Disziplin")
+
+**Vorteil:** Verhindert "Phantom-Knoten", die durch das Einbeziehen des Anchors in die `target_id` entstanden wären.
+
+### 6.2 Multigraph-Support
+Die Edge-ID enthält nun einen `variant`-Parameter (die Section), sodass mehrere Kanten zwischen denselben Knoten existieren können, wenn sie auf verschiedene Sections zeigen:
+* `[[Note#Section1]]` → Edge-ID: `src->tgt:kind@Section1`
+* `[[Note#Section2]]` → Edge-ID: `src->tgt:kind@Section2`
+
+### 6.3 Semantische Deduplizierung
+Die Deduplizierung basiert auf dem `src->tgt:kind@sec` Key, um sicherzustellen, dass identische Links (gleiche Quelle, Ziel, Typ und Section) nicht mehrfach erstellt werden.
+
+---
+
+## 7. Idempotenz & Konsistenz
 
 Das System garantiert fachliche Konsistenz auch bei mehrfachen Importen.
 * **Stabile IDs:** Deterministische IDs verhindern Duplikate bei Re-Imports.
-* **Deduplizierung:** Kanten werden anhand ihrer Identität erkannt. Die "stärkere" Provenance gewinnt.
\ No newline at end of file
+* **Deduplizierung:** Kanten werden anhand ihrer Identität (inkl. Section) erkannt. Die "stärkere" Provenance gewinnt.
+* **Format-agnostische Erkennung:** Kanten werden unabhängig vom Format (Inline, Callout, Wikilink) erkannt, um Dopplungen zu vermeiden.
\ No newline at end of file
diff --git a/docs/03_Technical_References/03_tech_api_reference.md b/docs/03_Technical_References/03_tech_api_reference.md
index 198f542..6e1d856 100644
--- a/docs/03_Technical_References/03_tech_api_reference.md
+++ b/docs/03_Technical_References/03_tech_api_reference.md
@@ -144,8 +144,11 @@ Lädt den Subgraphen um eine Note herum.
       "kind": "depends_on",
       "source": "uuid",
       "target": "uuid",
+      "target_section": "P3 – Disziplin",  // Optional: Abschnitts-Name bei Deep-Links
       "weight": 1.4,
-      "direction": "out"
+      "direction": "out",
+      "provenance": "explicit",
+      "confidence": 1.0
     }
   ],
   "stats": {
diff --git a/docs/03_Technical_References/03_tech_data_model.md b/docs/03_Technical_References/03_tech_data_model.md
index 6492522..320705a 100644
--- a/docs/03_Technical_References/03_tech_data_model.md
+++ b/docs/03_Technical_References/03_tech_data_model.md
@@ -3,7 +3,7 @@ doc_type: technical_reference
 audience: developer, architect
 scope: database, qdrant, schema
 status: active
-version: 2.8.0
+version: 2.9.1
 context: "Exakte Definition der Datenmodelle (Payloads) in Qdrant und Index-Anforderungen. Berücksichtigt WP-14 Modularisierung und WP-15b Multi-Hashes."
 ---
 
@@ -96,15 +96,19 @@ Es müssen Payload-Indizes für folgende Felder existieren:
 
 ## 4. Edge Payload (`mindnet_edges`)
 
-Gerichtete Kanten zwischen Knoten. Stark erweitert in v2.6 für Provenienz-Tracking.
+Gerichtete Kanten zwischen Knoten. Stark erweitert in v2.6 für Provenienz-Tracking. Seit v2.9.1 unterstützt das System **Section-basierte Links** (`[[Note#Section]]`), die in `target_id` und `target_section` aufgeteilt werden.
 
 **JSON-Schema:**
 
 ```json
 {
-  "edge_id": "string (keyword)",       // Deterministischer Hash aus (src, dst, kind)
+  "edge_id": "string (keyword)",       // Deterministischer Hash aus (src, dst, kind, variant)
+                                       // variant = target_section (erlaubt Multigraph für Sections)
   "source_id": "string (keyword)",     // Chunk-ID (Start)
   "target_id": "string (keyword)",     // Chunk-ID oder Note-Titel (bei Unresolved)
+                                       // WICHTIG: Enthält NUR den Note-Namen, KEINE Section-Info
+  "target_section": "string (keyword)", // Optional: Abschnitts-Name (z.B. "P3 – Disziplin")
+                                         // Wird aus [[Note#Section]] extrahiert
   "kind": "string (keyword)",          // Beziehungsart (z.B. 'depends_on')
   "scope": "string (keyword)",         // Immer 'chunk' (Legacy-Support: 'note')
   "note_id": "string (keyword)",       // Owner Note ID (Ursprung der Kante)
@@ -116,10 +120,16 @@ Gerichtete Kanten zwischen Knoten. Stark erweitert in v2.6 für Provenienz-Track
 }
 ```
 
+**Section-Support:**
+* Links wie `[[Note#Section]]` werden in `target_id="Note"` und `target_section="Section"` aufgeteilt.
+* Die Edge-ID enthält die Section als `variant`, sodass mehrere Kanten zwischen denselben Knoten existieren können, wenn sie auf verschiedene Sections zeigen.
+* Semantische Deduplizierung basiert auf `src->tgt:kind@sec` Key, um "Phantom-Knoten" zu vermeiden.
+
 **Erforderliche Indizes:**
 Es müssen Payload-Indizes für folgende Felder existieren:
 * `source_id`
 * `target_id`
+* `target_section` (neu: Keyword-Index für Section-basierte Filterung)
 * `kind`
 * `scope`
 * `note_id`
diff --git a/docs/03_Technical_References/03_tech_frontend.md b/docs/03_Technical_References/03_tech_frontend.md
index 5d61203..0ea3c64 100644
--- a/docs/03_Technical_References/03_tech_frontend.md
+++ b/docs/03_Technical_References/03_tech_frontend.md
@@ -3,7 +3,7 @@ doc_type: technical_reference
 audience: developer, frontend_architect
 scope: architecture, graph_viz, state_management
 status: active
-version: 2.7.0
+version: 2.9.1
 context: "Technische Dokumentation des modularen Streamlit-Frontends, der Graph-Engines und des Editors."
 ---
 
diff --git a/docs/03_Technical_References/03_tech_ingestion_pipeline.md b/docs/03_Technical_References/03_tech_ingestion_pipeline.md
index ce199dd..4d29518 100644
--- a/docs/03_Technical_References/03_tech_ingestion_pipeline.md
+++ b/docs/03_Technical_References/03_tech_ingestion_pipeline.md
@@ -3,7 +3,7 @@ doc_type: technical_reference
 audience: developer, devops
 scope: backend, ingestion, smart_edges, edge_registry, modularization
 status: active
-version: 2.9.0
+version: 2.13.12
 context: "Detaillierte technische Beschreibung der Import-Pipeline, Two-Pass-Workflow (WP-15b) und modularer Datenbank-Architektur (WP-14). Integriert Mistral-safe Parsing und Deep Fallback."
 ---
 
@@ -31,9 +31,10 @@ Der Prozess ist **asynchron**, **idempotent** und wird nun in zwei logische Durc
 4.  **Edge Registry Initialisierung (WP-22):**
     * Laden der Singleton-Instanz der `EdgeRegistry`.
     * Validierung der Vokabular-Datei unter `MINDNET_VOCAB_PATH`.
-5.  **Config Resolution (WP-14):**
+5.  **Config Resolution (WP-14 / v2.13.12):**
     * Bestimmung von `chunking_profile` und `retriever_weight` via zentraler `TypeRegistry`.
     * **Priorität:** 1. Frontmatter (Override) -> 2. `types.yaml` (Type) -> 3. Global Default.
+    * **Registry-First Profiling:** Automatische Anwendung der korrekten Profile basierend auf dem Note-Typ (z.B. `value` nutzt automatisch `structured_smart_edges_strict`).
 6.  **LocalBatchCache & Summary Generation (WP-15b):**
     * Erstellung von Kurz-Zusammenfassungen für jede Note.
     * Speicherung im `batch_cache` als Referenzrahmen für die spätere Kantenvalidierung.
@@ -126,19 +127,44 @@ Das Chunking ist profilbasiert und bezieht seine Konfiguration dynamisch aus der
 | `sliding_smart_edges`| `sliding_window` | Max: 600, Target: 400 | Fließtexte (Projekte). |
 | `structured_smart_edges` | `by_heading` | `strict: false` | Strukturierte Texte. |
 
-### 3.2 Die `by_heading` Logik (v2.9 Hybrid)
+### 3.2 Die `by_heading` Logik (v3.9.9 Atomic Section Logic)
 
-Die Strategie `by_heading` zerlegt Texte anhand ihrer Struktur (Überschriften). Sie unterstützt ein "Safety Net" gegen zu große Chunks.
+Die Strategie `by_heading` implementiert seit v3.9.9 das **"Pack-and-Carry-Over"** Verfahren (Regel 1-3), um Sektions-Überschriften und deren Inhalte atomar in Chunks zu halten.
 
-* **Split Level:** Definiert die Tiefe (z.B. `2` = H1 & H2 triggern Split).
-* **Modus "Strict" (`strict_heading_split: true`):**
-    * Jede Überschrift (`<= split_level`) erzwingt einen neuen Chunk.
-    * *Merge-Check:* Wenn der vorherige Chunk leer war (nur Überschriften), wird gemergt.
-    * *Safety Net:* Wird ein Abschnitt zu lang (> `max` Token), wird auch ohne Überschrift getrennt.
-* **Modus "Soft" (`strict_heading_split: false`):**
-    * **Hierarchie-Check:** Überschriften *oberhalb* des Split-Levels erzwingen **immer** einen Split.
-    * **Füll-Logik:** Überschriften *auf* dem Split-Level lösen nur dann einen neuen Chunk aus, wenn der aktuelle Chunk die `target`-Größe erreicht hat.
-    * *Safety Net:* Auch hier greift das `max` Token Limit.
+**Kernprinzipien:**
+* **Atomic Section Logic:** Überschriften und deren Inhalte werden als atomare Einheiten behandelt und nicht über Chunk-Grenzen hinweg getrennt.
+* **H1-Context Preservation:** Der Dokumenttitel (H1) wird zuverlässig als Breadcrumb in das Embedding-Fenster (`window`) aller Chunks injiziert.
+* **Signature Alignment:** Parameter-Synchronisierung zwischen Orchestrator und Strategien (`context_prefix` statt `doc_title`).
+
+**Split Level:** Definiert die Tiefe (z.B. `2` = H1 & H2 triggern Split).
+
+**Modus "Strict" (`strict_heading_split: true`):**
+* Jede Überschrift (`<= split_level`) erzwingt einen neuen Chunk.
+* *Merge-Check:* Wenn der vorherige Chunk leer war (nur Überschriften), wird gemergt.
+* *Safety Net:* Wird ein Abschnitt zu lang (> `max` Token), wird auch ohne Überschrift getrennt.
+
+**Modus "Soft" (`strict_heading_split: false`):**
+* **Hierarchie-Check:** Überschriften *oberhalb* des Split-Levels erzwingen **immer** einen Split.
+* **Füll-Logik:** Überschriften *auf* dem Split-Level lösen nur dann einen neuen Chunk aus, wenn der aktuelle Chunk die `target`-Größe erreicht hat.
+* **Pack-and-Carry-Over:** Wenn ein Abschnitt zu groß ist, wird er intelligent zerlegt, wobei der Rest (mit Überschrift) zurück in die Queue gelegt wird.
+* *Safety Net:* Auch hier greift das `max` Token Limit.
+
+### 3.3 Registry-First Profiling (v2.13.12)
+
+Seit v2.13.12 nutzt der `IngestionService` die korrekte Hierarchie zur Ermittlung des Chunking-Profils:
+
+**Priorität:**
+1. **Frontmatter** (Override) - Explizite `chunking_profile` Angabe
+2. **`types.yaml` Typ-Config** - Profil basierend auf `type`
+3. **Global Defaults** - Fallback auf `sliding_standard`
+
+**Wichtig:** Ein Hard-Fallback auf `sliding_standard` erfolgt nur noch, wenn keine Konfiguration existiert. Dies stellt sicher, dass Note-Typen wie `value` automatisch das korrekte Profil (z.B. `structured_smart_edges_strict`) erhalten.
+
+### 3.4 Deterministic Hashing (v2.13.12)
+
+Der `full`-Hash inkludiert nun alle strategischen Parameter (z.B. `split_level`, `strict_heading_split`), sodass Konfigurationsänderungen im Frontmatter zwingend einen Re-Import auslösen.
+
+**Impact:** Änderungen an Chunking-Parametern werden zuverlässig erkannt, auch wenn der Text unverändert bleibt.
 
 ---
 
diff --git a/docs/04_Operations/04_admin_operations.md b/docs/04_Operations/04_admin_operations.md
index 3797ebc..96f7b1b 100644
--- a/docs/04_Operations/04_admin_operations.md
+++ b/docs/04_Operations/04_admin_operations.md
@@ -279,4 +279,11 @@ python3 -m scripts.reset_qdrant --mode wipe --prefix "mindnet" --yes
 
 # 2. Neu importieren (Force Hash recalculation)
 python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --force
-```
\ No newline at end of file
+```
+
+**Wichtig (v2.9.1 Migration):**
+Nach dem Update auf v2.9.1 (Section-basierte Links, Multigraph-Support) ist ein vollständiger Re-Import erforderlich, um "Phantom-Knoten" zu beheben und die neue Edge-Struktur zu konsolidieren:
+```bash
+python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --force
+```
+Dies stellt sicher, dass alle bestehenden Links korrekt in `target_id` und `target_section` aufgeteilt werden.
\ No newline at end of file

From 39fb821481a07cb3cc5c0aa12d3e7e7d21045cdd Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 30 Dec 2025 12:24:20 +0100
Subject: [PATCH 33/33] WP4d - branch merger Comment & Release Note

---
 docs/99_Archive/WP4d_merge_commit.md  |  99 +++++++++++
 docs/99_Archive/WP4d_release_notes.md | 236 ++++++++++++++++++++++++++
 2 files changed, 335 insertions(+)
 create mode 100644 docs/99_Archive/WP4d_merge_commit.md
 create mode 100644 docs/99_Archive/WP4d_release_notes.md

diff --git a/docs/99_Archive/WP4d_merge_commit.md b/docs/99_Archive/WP4d_merge_commit.md
new file mode 100644
index 0000000..298c055
--- /dev/null
+++ b/docs/99_Archive/WP4d_merge_commit.md
@@ -0,0 +1,99 @@
+# Branch Merge Commit Message: WP4d
+
+```
+feat: Section-basierte Links, Atomic Section Chunking & Registry-First Profiling (v2.9.1)
+
+## Graph Topology & Edge Management
+
+### Section-basierte Links (Multigraph-Support)
+- Split `[[Note#Section]]` Links in `target_id="Note"` und `target_section="Section"`
+- Edge-ID enthält nun `variant` (Section), ermöglicht mehrere Kanten zwischen denselben Knoten
+- Semantische Deduplizierung basiert auf `src->tgt:kind@sec` Key
+- Behebt "Phantom-Knoten" durch korrekte Trennung von Note-Name und Abschnitt
+
+**Geänderte Dateien:**
+- `app/core/graph/graph_utils.py`: `parse_link_target()` für Section-Extraktion
+- `app/core/graph/graph_derive_edges.py`: `target_section` in Edge-Payload
+- `app/core/database/qdrant.py`: Keyword-Index für `target_section`
+- `app/core/database/qdrant_points.py`: Explizites Durchreichen von `target_section`
+- `app/models/dto.py`: `EdgeDTO` mit `target_section` Feld
+
+### Extraction & Parsing Verbesserungen
+- Multi-line Callout-Blocks korrekt verarbeitet (stop-check logic)
+- Robuster Fallback für "headless" Blocks (split chunks)
+- Liberalisierte Regex für Umlaute und Sonderzeichen in Targets
+
+**Geänderte Dateien:**
+- `app/core/graph/graph_extractors.py`: Multi-line Callout-Parser, erweiterte Regex
+
+## Chunking & Ingestion (v3.9.9 / v2.13.12)
+
+### Atomic Section Logic (v3.9.9)
+- Vollständige Implementierung des "Pack-and-Carry-Over" Verfahrens (Regel 1-3)
+- Sektions-Überschriften und Inhalte bleiben atomar in Chunks
+- H1-Context Preservation: Dokumenttitel als Breadcrumb in Embedding-Fenster
+- Signature Alignment: Parameter-Synchronisierung (`context_prefix` statt `doc_title`)
+
+**Geänderte Dateien:**
+- `app/core/chunking/chunking_strategies.py`: Atomic Section Logic implementiert
+
+### Format-agnostische De-Duplizierung
+- Prüfung auf vorhandene Kanten basiert auf Ziel (`target`), nicht String-Match
+- Verhindert Dopplung von Kanten, die bereits via `[!edge]` Callout vorhanden sind
+- Global Pool Integration für unzugeordnete Kanten
+
+**Geänderte Dateien:**
+- `app/core/chunking/chunking_propagation.py`: Ziel-basierte Prüfung
+
+### Registry-First Profiling (v2.13.12)
+- Korrekte Hierarchie: Frontmatter > types.yaml Typ-Config > Global Defaults
+- Hard-Fallback auf `sliding_standard` nur wenn keine Konfiguration existiert
+- Automatische Anwendung korrekter Profile basierend auf Note-Typ
+
+### Deterministic Hashing
+- `full`-Hash inkludiert strategische Parameter (`split_level`, `strict_heading_split`)
+- Konfigurationsänderungen im Frontmatter lösen zwingend Re-Import aus
+
+**Geänderte Dateien:**
+- `app/core/ingestion/ingestion_processor.py`: Registry-First Profiling, Deterministic Hashing
+
+## Impact & Breaking Changes
+
+### Migration erforderlich
+**WICHTIG:** Vollständiger Re-Import erforderlich für bestehende Vaults:
+```bash
+python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --force
+```
+
+**Grund:**
+- Behebt "Phantom-Knoten" durch korrekte Aufteilung von `[[Note#Section]]` Links
+- Konsolidiert Edge-Struktur mit `target_section` Feld
+- Aktualisiert Chunking basierend auf neuen Strategien
+
+### Fixes
+- ✅ Resolves: Mehrere Links zur gleichen Note in einem Callout-Block wurden zu einer Kante gemergt
+- ✅ Resolves: "Phantom-Knoten" durch Einbeziehung des Anchors in `target_id`
+- ✅ Resolves: Redundante `[[rel:...]]` Links in Chunks
+- ✅ Resolves: Inkonsistente Metadaten in Qdrant durch Registry-First Profiling
+
+## Dokumentation
+
+Alle relevanten Dokumente aktualisiert:
+- `03_tech_data_model.md`: Edge Payload Schema mit `target_section`
+- `02_concept_graph_logic.md`: Section-basierte Links & Multigraph-Support
+- `03_tech_ingestion_pipeline.md`: Chunking-Strategien, Registry-First Profiling
+- `03_tech_api_reference.md`: EdgeDTO mit `target_section`
+- `01_knowledge_design.md`: Deep-Links dokumentiert
+- `00_glossary.md`: Neue Begriffe ergänzt
+- `04_admin_operations.md`: Migration-Hinweis
+
+## Versionen
+
+- Graph Topology: v2.9.1
+- Chunking Strategies: v3.9.9
+- Ingestion Processor: v2.13.12
+- API DTO: v0.6.7
+
+Closes #[issue-number]
+```
+
diff --git a/docs/99_Archive/WP4d_release_notes.md b/docs/99_Archive/WP4d_release_notes.md
new file mode 100644
index 0000000..4653ceb
--- /dev/null
+++ b/docs/99_Archive/WP4d_release_notes.md
@@ -0,0 +1,236 @@
+# Release Notes: Mindnet v2.9.1 (WP4d)
+
+**Release Date:** 2025-01-XX  
+**Type:** Feature Release mit Breaking Changes  
+**Branch:** WP4d
+
+---
+
+## 🎯 Übersicht
+
+Diese Version führt **Section-basierte Links** ein, verbessert das Chunking durch **Atomic Section Logic** und implementiert **Registry-First Profiling** für konsistentere Konfigurationsauflösung. Die Änderungen erfordern einen **vollständigen Re-Import** bestehender Vaults.
+
+---
+
+## ✨ Neue Features
+
+### Section-basierte Links (Deep-Links)
+
+Mindnet unterstützt nun **Deep-Links** zu spezifischen Abschnitten innerhalb einer Note:
+
+```markdown
+[[rel:based_on Mein Leitbild#P3 – Disziplin]]
+```
+
+**Vorteile:**
+- Mehrere Links zur gleichen Note möglich (verschiedene Sections)
+- Präzise Kontext-Ladung (nur relevanter Abschnitt)
+- Keine "Phantom-Knoten" mehr durch korrekte Trennung von Note-Name und Abschnitt
+
+**Technische Details:**
+- Links werden in `target_id="Note"` und `target_section="Section"` aufgeteilt
+- Edge-ID enthält `variant` (Section) für Multigraph-Support
+- Semantische Deduplizierung basiert auf `src->tgt:kind@sec` Key
+
+### Atomic Section Logic (Chunking v3.9.9)
+
+Das Chunking hält nun Sektions-Überschriften und deren Inhalte **atomar** zusammen:
+
+**"Pack-and-Carry-Over" Verfahren:**
+- Regel 1 & 2: Sektionen werden zusammengepackt, wenn sie in den Token-Limit passen
+- Regel 3: Zu große Sektionen werden intelligent zerlegt, Rest wird zurück in Queue gelegt
+- H1-Context Preservation: Dokumenttitel wird als Breadcrumb in alle Chunks injiziert
+
+**Vorteile:**
+- Keine getrennten Überschriften mehr
+- Bessere semantische Kohärenz in Chunks
+- Verbesserte Retrieval-Qualität durch vollständigen Kontext
+
+### Registry-First Profiling (v2.13.12)
+
+Die Konfigurationsauflösung folgt nun einer klaren Hierarchie:
+
+1. **Frontmatter** (höchste Priorität)
+2. **types.yaml Typ-Config**
+3. **Global Defaults**
+
+**Impact:**
+- Note-Typen wie `value` erhalten automatisch das korrekte Profil (`structured_smart_edges_strict`)
+- Keine manuellen Overrides mehr nötig für Standard-Typen
+- Konsistente Metadaten in Qdrant
+
+---
+
+## 🔧 Verbesserungen
+
+### Extraction & Parsing
+
+- **Multi-line Callout-Blocks:** Korrekte Verarbeitung von mehrzeiligen `[!edge]` Callouts
+- **Robuste Fallbacks:** "Headless" Blocks werden korrekt behandelt
+- **Liberalisierte Regex:** Unterstützung für Umlaute und Sonderzeichen in Link-Targets
+
+### Format-agnostische De-Duplizierung
+
+- Kanten werden unabhängig vom Format (Inline, Callout, Wikilink) erkannt
+- Verhindert Dopplungen, wenn Kanten bereits via `[!edge]` Callout vorhanden sind
+- Ziel-basierte Prüfung statt String-Match
+
+### Deterministic Hashing
+
+- `full`-Hash inkludiert strategische Parameter (`split_level`, `strict_heading_split`)
+- Konfigurationsänderungen im Frontmatter lösen zwingend Re-Import aus
+- Zuverlässigere Change Detection
+
+---
+
+## 🐛 Bugfixes
+
+- ✅ **Behoben:** Mehrere Links zur gleichen Note in einem Callout-Block wurden zu einer Kante gemergt
+- ✅ **Behoben:** "Phantom-Knoten" durch Einbeziehung des Anchors in `target_id`
+- ✅ **Behoben:** Redundante `[[rel:...]]` Links in Chunks
+- ✅ **Behoben:** Inkonsistente Metadaten in Qdrant durch fehlerhafte Profil-Auflösung
+- ✅ **Behoben:** `TypeError` durch Parameter-Mismatch zwischen Orchestrator und Strategien
+
+---
+
+## ⚠️ Breaking Changes & Migration
+
+### Migration erforderlich
+
+**WICHTIG:** Nach dem Update auf v2.9.1 ist ein **vollständiger Re-Import** erforderlich:
+
+```bash
+python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --force
+```
+
+**Warum?**
+- Behebt "Phantom-Knoten" durch korrekte Aufteilung von `[[Note#Section]]` Links
+- Konsolidiert Edge-Struktur mit neuem `target_section` Feld
+- Aktualisiert Chunking basierend auf Atomic Section Logic
+
+**Was passiert beim Re-Import?**
+- Alle bestehenden Links werden neu geparst und in `target_id` + `target_section` aufgeteilt
+- Chunks werden mit neuer Atomic Section Logic neu generiert
+- Edge-Struktur wird konsolidiert (Multigraph-Support)
+
+**Dauer:** Abhängig von Vault-Größe (typischerweise 5-30 Minuten)
+
+---
+
+## 📚 API-Änderungen
+
+### EdgeDTO erweitert
+
+```python
+class EdgeDTO(BaseModel):
+    # ... bestehende Felder ...
+    target_section: Optional[str] = None  # Neu: Abschnitts-Name
+```
+
+**Impact für API-Consumer:**
+- Graph-Endpunkte (`/graph/{note_id}`) enthalten nun `target_section` in Edge-Objekten
+- Frontend kann Section-Informationen für präzisere Visualisierung nutzen
+
+---
+
+## 📖 Dokumentation
+
+Alle relevanten Dokumente wurden aktualisiert:
+
+- ✅ `03_tech_data_model.md`: Edge Payload Schema mit `target_section`
+- ✅ `02_concept_graph_logic.md`: Section-basierte Links & Multigraph-Support
+- ✅ `03_tech_ingestion_pipeline.md`: Chunking-Strategien, Registry-First Profiling
+- ✅ `03_tech_api_reference.md`: EdgeDTO mit `target_section`
+- ✅ `01_knowledge_design.md`: Deep-Links dokumentiert
+- ✅ `00_glossary.md`: Neue Begriffe ergänzt
+- ✅ `04_admin_operations.md`: Migration-Hinweis
+
+---
+
+## 🔄 Technische Details
+
+### Geänderte Module
+
+**Graph Topology:**
+- `app/core/graph/graph_utils.py`: `parse_link_target()` für Section-Extraktion
+- `app/core/graph/graph_derive_edges.py`: `target_section` in Edge-Payload
+- `app/core/graph/graph_extractors.py`: Multi-line Callout-Parser
+
+**Chunking:**
+- `app/core/chunking/chunking_strategies.py`: Atomic Section Logic (v3.9.9)
+- `app/core/chunking/chunking_propagation.py`: Format-agnostische De-Duplizierung
+
+**Ingestion:**
+- `app/core/ingestion/ingestion_processor.py`: Registry-First Profiling (v2.13.12), Deterministic Hashing
+
+**Database:**
+- `app/core/database/qdrant.py`: Keyword-Index für `target_section`
+- `app/core/database/qdrant_points.py`: Explizites Durchreichen von `target_section`
+
+**API:**
+- `app/models/dto.py`: `EdgeDTO` mit `target_section` Feld (v0.6.7)
+
+### Versionsnummern
+
+- Graph Topology: **v2.9.1**
+- Chunking Strategies: **v3.9.9**
+- Ingestion Processor: **v2.13.12**
+- API DTO: **v0.6.7**
+
+---
+
+## 🚀 Upgrade-Pfad
+
+### Für Administratoren
+
+1. **Backup erstellen:**
+   ```bash
+   docker stop qdrant
+   tar -czf qdrant_backup_$(date +%F).tar.gz ./qdrant_data
+   docker start qdrant
+   ```
+
+2. **Code aktualisieren:**
+   ```bash
+   git pull origin main
+   source .venv/bin/activate
+   pip install -r requirements.txt
+   ```
+
+3. **Re-Import durchführen:**
+   ```bash
+   python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --force
+   ```
+
+4. **Services neu starten:**
+   ```bash
+   sudo systemctl restart mindnet-prod
+   sudo systemctl restart mindnet-ui-prod
+   ```
+
+### Für Entwickler
+
+- Keine Code-Änderungen erforderlich, wenn nur API genutzt wird
+- Frontend kann `target_section` Feld in Edge-Objekten nutzen (optional)
+
+---
+
+## 📝 Bekannte Einschränkungen
+
+- **Migration-Dauer:** Große Vaults (>10.000 Notizen) können 30+ Minuten benötigen
+- **Temporärer Speicher:** Während des Re-Imports kann Qdrant-Speicher temporär ansteigen
+
+---
+
+## 🙏 Danksagungen
+
+Diese Version wurde durch umfangreiche Code-Analyse und Dokumentationsprüfung ermöglicht. Besonderer Fokus lag auf:
+- Konsistenz zwischen Code und Dokumentation
+- Vollständige Abdeckung aller Rollen (Entwickler, Administratoren, Anwender, Tester, Deployment)
+- Klare Migration-Pfade
+
+---
+
+**Vollständige Changelog:** Siehe Git-Commits für detaillierte Änderungen  
+**Support:** Bei Fragen zur Migration siehe [Admin Operations Guide](../04_Operations/04_admin_operations.md)
+