From c61b66b49df5b1235996bf0b80bbfe31a5810bae Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 16 Dec 2025 12:07:28 +0100
Subject: [PATCH] neue chunker, Fehler und Strategie Korrektur

---
 app/core/chunker.py               | 129 +++++++++++++++++------------
 app/services/semantic_analyzer.py |  63 ++++++++++----
 config/types.yaml                 | 132 +++++++++++++++++-------------
 3 files changed, 197 insertions(+), 127 deletions(-)

diff --git a/app/core/chunker.py b/app/core/chunker.py
index 0943010..1d6f625 100644
--- a/app/core/chunker.py
+++ b/app/core/chunker.py
@@ -1,7 +1,7 @@
 """
 FILE: app/core/chunker.py
 DESCRIPTION: Zerlegt Texte in Chunks (Sliding Window oder nach Headings). Orchestriert die Smart-Edge-Allocation via SemanticAnalyzer.
-VERSION: 2.6.0 (Fix: Strict Heading Split & Header Retention)
+VERSION: 2.9.0 (Feat: Hybrid Strict Splitting with Size Safety)
 STATUS: Active
 DEPENDENCIES: app.services.semantic_analyzer, app.core.derive_edges, markdown_it, yaml, asyncio
 EXTERNAL_CONFIG: config/types.yaml
@@ -25,7 +25,7 @@ from app.services.semantic_analyzer import get_semantic_analyzer
 try:
     from app.core.derive_edges import build_edges_for_note
 except ImportError:
-    # Mock für Tests, falls Module fehlen
+    # Mock für Tests
     def build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False): return []
 
 logger = logging.getLogger(__name__)
@@ -122,10 +122,14 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     
     for line in lines:
         stripped = line.strip()
-        if stripped.startswith('# '): 
-            # H1 wird für den Titel genutzt, aber nicht als Block für sliding window
-            # (Außer es ist H1 im Body, aber wir ignorieren H1 hier meist als Title)
-            continue 
+        if stripped.startswith('# '):
+            if buffer:
+                content = "\n".join(buffer).strip()
+                if content:
+                    blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+                buffer = []
+            blocks.append(RawBlock("heading", stripped, 1, section_path, current_h2))
+            
         elif stripped.startswith('## '):
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -134,8 +138,16 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
                 buffer = []
             current_h2 = stripped[3:].strip()
             section_path = f"/{current_h2}"
-            # WICHTIG: Die Überschrift selbst als Block speichern!
             blocks.append(RawBlock("heading", stripped, 2, section_path, current_h2))
+            
+        elif stripped.startswith('### '):
+             if buffer:
+                content = "\n".join(buffer).strip()
+                if content:
+                    blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+                buffer = []
+             blocks.append(RawBlock("heading", stripped, 3, section_path, current_h2))
+
         elif not stripped:
             if buffer:
                 content = "\n".join(buffer).strip()
@@ -175,19 +187,18 @@ def _strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], not
         text_body = "\n\n".join([b.text for b in buf])
         win_body = f"{context_prefix}\n{text_body}".strip() if context_prefix else text_body
         
+        # Basis-Info vom ersten Block im Buffer
+        sec = buf[0].section_title if buf else None
+        path = buf[0].section_path if buf else "/"
+
         if estimate_tokens(text_body) <= max_tokens:
-            sec = buf[0].section_title if buf else None
-            path = buf[0].section_path if buf else "/"
             _create_chunk_obj(chunks, note_id, text_body, win_body, sec, path)
         else:
+            # Fallback: Wenn Block zu groß, intern splitten (Sentence-Level)
             sentences = split_sentences(text_body)
             current_chunk_sents = []
             current_len = 0
             
-            # Basis-Info vom ersten Block im Buffer
-            sec = buf[0].section_title if buf else None
-            path = buf[0].section_path if buf else "/"
-
             for sent in sentences:
                 sent_len = estimate_tokens(sent)
                 if current_len + sent_len > target and current_chunk_sents:
@@ -219,11 +230,7 @@ def _strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], not
         buf = []
 
     for b in blocks:
-        # Bei Sliding Window ignorieren wir Heading-Blocks als Split-Trigger NICHT zwingend, 
-        # aber wir wollen Headings oft nicht "allein" stehen haben. 
-        # Hier einfache Logik:
         if b.kind == "heading":
-            # Optional: Buffer flushen bei neuem Header, um Kontextwechsel sauberer zu machen
             flush_buffer()
         
         current_buf_text = "\n\n".join([x.text for x in buf])
@@ -237,30 +244,34 @@ def _strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], not
 
 def _strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
     """
-    STRICT HEADING SPLIT (Fix v2.6.0): 
-    Trennt den Text konsequent an jeder Überschrift der definierten Ebene.
-    Behält Überschriften als Teil (erste Zeile) des Chunks bei.
-    Kein Merging kleiner Abschnitte über Header-Grenzen hinweg.
+    MODUS: Structured / Heading Split
+    - split_level: Ebene für logische Trennung (z.B. H2).
+    - strict_heading_split: 
+        True: Trennt an jedem Header <= split_level. 
+              NEU v2.9: Wenn Inhalt > max_tokens, wird trotzdem gesplittet (Safety Split).
+        False: Fasst zusammen bis 'target' erreicht ist.
     """
     split_level = config.get("split_level", 2)
-    chunks = []
+    target = config.get("target", 400)
+    max_limit = config.get("max", 600)
+    strict_mode = config.get("strict_heading_split", False)
     
-    # Temporärer Speicher für den aktuellen Chunk
+    chunks = []
     current_chunk_blocks = []
     
     context_prefix = f"# {doc_title}"
 
+    def has_content(blk_list):
+        return any(b.kind != "heading" for b in blk_list)
+
     def flush_current_chunk():
         nonlocal current_chunk_blocks
         if not current_chunk_blocks:
             return
         
-        # Text zusammenbauen
         text_body = "\n\n".join([b.text for b in current_chunk_blocks])
-        # Window bauen (hier einfach Text, da Kontext via Header implizit ist)
         win_body = f"{context_prefix}\n{text_body}".strip()
         
-        # Metadaten vom ersten Block (üblicherweise der Header) nehmen
         first_b = current_chunk_blocks[0]
         sec = first_b.section_title
         path = first_b.section_path
@@ -268,19 +279,49 @@ def _strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id
         _create_chunk_obj(chunks, note_id, text_body, win_body, sec, path)
         current_chunk_blocks = []
 
+    def get_current_size():
+        txt = "\n\n".join([b.text for b in current_chunk_blocks])
+        return estimate_tokens(txt)
+
     for b in blocks:
-        # Prüfen, ob dieser Block ein Trenner (Header auf Split-Level) ist
-        is_splitter = (b.kind == "heading" and b.level == split_level)
+        # 1. Header Logic (Struktur-Trigger)
+        is_splitter = (b.kind == "heading" and b.level is not None and b.level <= split_level)
         
         if is_splitter:
-            # 1. Den bisherigen Chunk abschließen (falls vorhanden)
-            flush_current_chunk()
+            is_higher_hierarchy = (b.level < split_level)
             
-            # 2. Den neuen Chunk mit diesem Header beginnen
-            current_chunk_blocks.append(b)
+            if strict_mode:
+                # STRICT:
+                # Wir splitten immer, außer der Vor-Chunk ist leer.
+                if current_chunk_blocks and has_content(current_chunk_blocks):
+                    flush_current_chunk()
+                current_chunk_blocks.append(b)
+            else:
+                # SOFT:
+                # Split bei Hierarchie-Wechsel ODER wenn voll.
+                if is_higher_hierarchy:
+                    flush_current_chunk()
+                    current_chunk_blocks.append(b)
+                elif current_chunk_blocks and get_current_size() >= target:
+                    flush_current_chunk()
+                    current_chunk_blocks.append(b)
+                else:
+                    current_chunk_blocks.append(b)
         else:
-            # Einfach anhängen
-            current_chunk_blocks.append(b)
+            # 2. Content Logic (Safety Trigger für Monster-Abschnitte)
+            # Bevor wir den Block anhängen: Würde er das Fass zum Überlaufen bringen?
+            # Wir nutzen hier 'max' als harte Grenze für den Safety-Split.
+            current_size = get_current_size()
+            block_size = estimate_tokens(b.text)
+            
+            if current_chunk_blocks and (current_size + block_size > max_limit):
+                # NOTBREMSE: Chunk wird zu groß.
+                # Wir splitten hier, auch wenn kein Header da ist.
+                # Der Kontext (Section Title) bleibt erhalten, da er aus `current_h2` kommt (siehe parse_blocks).
+                flush_current_chunk()
+                current_chunk_blocks.append(b)
+            else:
+                current_chunk_blocks.append(b)
 
     # Letzten Rest flushen
     flush_current_chunk()
@@ -301,14 +342,12 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
     primary_strategy = config.get("strategy", "sliding_window")
     enable_smart_edges = config.get("enable_smart_edge_allocation", False)
 
-    # Performance/Cost-Guard: Bei Entwürfen keine Smart Edges
     if enable_smart_edges and note_status in ["draft", "initial_gen"]:
         logger.info(f"Chunker: Skipping Smart Edges for draft '{note_id}'.")
         enable_smart_edges = False
 
     blocks, doc_title = parse_blocks(md_text)
     
-    # Strategie-Auswahl
     if primary_strategy == "by_heading":
         chunks = await asyncio.to_thread(_strategy_by_heading, blocks, config, note_id, doc_title)
     else:
@@ -317,11 +356,9 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
     if not chunks:
         return []
 
-    # Smart Edge Allocation (WP-15)
     if enable_smart_edges:
         chunks = await _run_smart_edge_allocation(chunks, md_text, note_id, note_type)
 
-    # Verkettung der Chunks (next/prev)
     for i, ch in enumerate(chunks):
         ch.neighbors_prev = chunks[i-1].id if i > 0 else None
         ch.neighbors_next = chunks[i+1].id if i < len(chunks)-1 else None
@@ -329,10 +366,6 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
     return chunks
 
 def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> List[str]:
-    """
-    Hilfsfunktion: Erstellt einen Dummy-Chunk für den gesamten Text und ruft
-    den Edge-Parser auf, um ALLE Kanten der Notiz zu finden.
-    """
     dummy_chunk = {
         "chunk_id": f"{note_id}#full",
         "text": md_text, 
@@ -340,7 +373,6 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
         "window": md_text,
         "type": note_type
     }
-    # Parsing aller Kanten (Inline, Wikilinks, Callouts)
     raw_edges = build_edges_for_note(
         note_id, 
         [dummy_chunk], 
@@ -351,29 +383,23 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
     for e in raw_edges:
         kind = e.get("kind")
         target = e.get("target_id")
-        # Struktur-Kanten ignorieren wir für die Verteilung
         if target and kind not in ["belongs_to", "next", "prev", "backlink"]:
             all_candidates.add(f"{kind}:{target}")
-            
     return list(all_candidates)
 
 async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_id: str, note_type: str) -> List[Chunk]:
     analyzer = get_semantic_analyzer()
-    
-    # A. Alle potenziellen Kanten der Notiz sammeln
     candidate_list = _extract_all_edges_from_md(full_text, note_id, note_type)
     
     if not candidate_list:
         return chunks
 
-    # B. LLM Filterung pro Chunk (Parallel)
     tasks = []
     for chunk in chunks:
         tasks.append(analyzer.assign_edges_to_chunk(chunk.text, candidate_list, note_type))
     
     results_per_chunk = await asyncio.gather(*tasks)
     
-    # C. Injection & Fallback Tracking
     assigned_edges_global = set()
     
     for i, confirmed_edges in enumerate(results_per_chunk):
@@ -381,18 +407,13 @@ async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_i
         chunk.suggested_edges = confirmed_edges
         assigned_edges_global.update(confirmed_edges)
         
-        # Injection: Wir hängen die bestätigten Edges unsichtbar (fürs Embedding) oder sichtbar an
-        # Hier als "Pseudo-Code" im Text, damit sie embedded werden.
         if confirmed_edges:
-            # Format: [[rel:kind|target]]
             injection_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in confirmed_edges if ':' in e])
             chunk.text += injection_str
             chunk.window += injection_str
 
-    # D. Fallback: Kanten, die NIRGENDS zugewiesen wurden, werden JEDEM Chunk angehängt (Sicherheit)
     unassigned = set(candidate_list) - assigned_edges_global
     if unassigned:
-        logger.info(f"Chunker: {len(unassigned)} unassigned edges in {note_id}. Distributing to all chunks.")
         fallback_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in unassigned if ':' in e])
         for chunk in chunks:
             chunk.text += fallback_str
diff --git a/app/services/semantic_analyzer.py b/app/services/semantic_analyzer.py
index aa9eafd..24ca205 100644
--- a/app/services/semantic_analyzer.py
+++ b/app/services/semantic_analyzer.py
@@ -1,10 +1,10 @@
 """
 FILE: app/services/semantic_analyzer.py
 DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
-VERSION: 2.0.0
+VERSION: 2.1.0 (Fix: Strict Edge String Validation against LLM Hallucinations)
 STATUS: Active
 DEPENDENCIES: app.services.llm_service, json, logging
-LAST_ANALYSIS: 2025-12-15
+LAST_ANALYSIS: 2025-12-16
 """
 
 import json
@@ -21,6 +21,34 @@ class SemanticAnalyzer:
     def __init__(self):
         self.llm = LLMService()
 
+    def _is_valid_edge_string(self, edge_str: str) -> bool:
+        """
+        Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
+        Verhindert, dass LLM-Geschwätz ("Here is the list: ...") als Kante durchrutscht.
+        """
+        if not isinstance(edge_str, str) or ":" not in edge_str:
+            return False
+            
+        parts = edge_str.split(":", 1)
+        kind = parts[0].strip()
+        target = parts[1].strip()
+        
+        # Regel 1: Ein 'kind' (Beziehungstyp) darf keine Leerzeichen enthalten.
+        # Erlaubt: "derived_from", "related_to"
+        # Verboten: "derived end of instruction", "Here is the list"
+        if " " in kind:
+            return False
+            
+        # Regel 2: Plausible Länge für den Typ
+        if len(kind) > 40 or len(kind) < 2:
+            return False
+            
+        # Regel 3: Target darf nicht leer sein
+        if not target:
+            return False
+            
+        return True
+
     async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
         """
         Sendet einen Chunk und eine Liste potenzieller Kanten an das LLM.
@@ -59,14 +87,13 @@ class SemanticAnalyzer:
         )
 
         try:
-            # 4. LLM Call mit Traffic Control (NEU: priority="background")
-            # Wir nutzen die "Slow Lane", damit der User im Chat nicht warten muss.
+            # 4. LLM Call mit Traffic Control
             response_json = await self.llm.generate_raw_response(
                 prompt=final_prompt,
                 force_json=True,
                 max_retries=5, 
                 base_delay=5.0,
-                priority="background"  # <--- WICHTIG: Drosselung aktivieren
+                priority="background"
             )
 
             # LOG: Raw Response Preview
@@ -91,30 +118,38 @@ class SemanticAnalyzer:
             valid_edges = []
 
             # 6. Robuste Validierung (List vs Dict)
+            # Wir sammeln erst alle Strings ein
+            raw_candidates = []
+            
             if isinstance(data, list):
-                # Standardfall: ["kind:target", ...]
-                valid_edges = [str(e) for e in data if isinstance(e, str) and ":" in e]
+                raw_candidates = data
             
             elif isinstance(data, dict):
-                # Abweichende Formate behandeln
                 logger.info(f"ℹ️ [SemanticAnalyzer] LLM lieferte Dict statt Liste. Versuche Reparatur. Keys: {list(data.keys())}")
-                
                 for key, val in data.items():
                     # Fall A: {"edges": ["kind:target"]}
                     if key.lower() in ["edges", "results", "kanten", "matches"] and isinstance(val, list):
-                         valid_edges.extend([str(e) for e in val if isinstance(e, str) and ":" in e])
+                         raw_candidates.extend(val)
                     
-                    # Fall B: {"kind": "target"}
+                    # Fall B: {"kind": "target"} (Beziehung als Key)
                     elif isinstance(val, str):
-                        valid_edges.append(f"{key}:{val}")
+                        raw_candidates.append(f"{key}:{val}")
                     
                     # Fall C: {"kind": ["target1", "target2"]}
                     elif isinstance(val, list):
                         for target in val:
                             if isinstance(target, str):
-                                valid_edges.append(f"{key}:{target}")
+                                raw_candidates.append(f"{key}:{target}")
 
-            # Safety: Filtere nur Kanten, die halbwegs valide aussehen
+            # 7. Strict Validation Loop
+            for e in raw_candidates:
+                e_str = str(e)
+                if self._is_valid_edge_string(e_str):
+                    valid_edges.append(e_str)
+                else:
+                    logger.debug(f"   [SemanticAnalyzer] Invalid edge format rejected: '{e_str}'")
+
+            # Safety: Filtere nur Kanten, die halbwegs valide aussehen (Doppelcheck)
             final_result = [e for e in valid_edges if ":" in e]
             
             # LOG: Ergebnis
diff --git a/config/types.yaml b/config/types.yaml
index a3385e0..5a465a7 100644
--- a/config/types.yaml
+++ b/config/types.yaml
@@ -1,4 +1,4 @@
-version: 2.4.0 # Optimized for Async Intelligence & Hybrid Router
+version: 2.6.0 # Final WP-15 Config: Smart Edges & Strict/Soft Chunking
 
 # ==============================================================================
 # 1. CHUNKING PROFILES
@@ -7,7 +7,6 @@ version: 2.4.0 # Optimized for Async Intelligence & Hybrid Router
 chunking_profiles:
   
   # A. SHORT & FAST
-  # Für Glossar, Tasks, Risiken. Kleine Schnipsel.
   sliding_short:
     strategy: sliding_window
     enable_smart_edge_allocation: false
@@ -16,7 +15,6 @@ chunking_profiles:
     overlap: [30, 50]
 
   # B. STANDARD & FAST
-  # Der "Traktor": Robust für Quellen, Journal, Daily Logs.
   sliding_standard:
     strategy: sliding_window
     enable_smart_edge_allocation: false
@@ -24,10 +22,8 @@ chunking_profiles:
     max: 650
     overlap: [50, 100]
 
-  # C. SMART FLOW (Performance-Safe Mode)
-  # Für Konzepte, Projekte, Erfahrungen.
-  # HINWEIS: 'enable_smart_edge_allocation' ist vorerst FALSE, um Ollama
-  # bei der Generierung nicht zu überlasten. Später wieder aktivieren.
+  # C. SMART FLOW (Text-Fluss)
+  # Nutzt Sliding Window, aber mit LLM-Kanten-Analyse.
   sliding_smart_edges:
     strategy: sliding_window
     enable_smart_edge_allocation: true 
@@ -35,12 +31,38 @@ chunking_profiles:
     max: 600
     overlap: [50, 80]
 
-  # D. SMART STRUCTURE
-  # Für Profile, Werte, Prinzipien. Trennt hart an Überschriften (H2).
+  # D. SMART STRUCTURE (Soft Split)
+  # Trennt bevorzugt an H2, fasst aber kleine Abschnitte zusammen ("Soft Mode").
   structured_smart_edges:
     strategy: by_heading
     enable_smart_edge_allocation: true
     split_level: 2
+    strict_heading_split: false 
+    max: 600
+    target: 400
+    overlap: [50, 80]
+
+  # E. SMART STRUCTURE STRICT (H2 Hard Split)
+  # Trennt ZWINGEND an jeder H2. 
+  # Verhindert, dass "Vater" und "Partner" (Profile) oder Werte verschmelzen.
+  structured_smart_edges_strict:
+    strategy: by_heading
+    enable_smart_edge_allocation: true
+    split_level: 2
+    strict_heading_split: true # Hard Mode
+    max: 600
+    target: 400
+    overlap: [50, 80]
+
+  # F. SMART STRUCTURE DEEP (H3 Hard Split + Merge-Check)
+  # Spezialfall für "Leitbild Prinzipien":
+  # - Trennt H1, H2, H3 hart.
+  # - Aber: Merged "leere" H2 (Tier 2) mit der folgenden H3 (MP1).
+  structured_smart_edges_strict_L3:
+    strategy: by_heading
+    enable_smart_edge_allocation: true
+    split_level: 3
+    strict_heading_split: true
     max: 600
     target: 400
     overlap: [50, 80]
@@ -59,24 +81,13 @@ defaults:
 
 types:
 
-  # --- KERNTYPEN (Hoch priorisiert & Smart) ---
+  # --- KERNTYPEN ---
 
   experience:
     chunking_profile: sliding_smart_edges
     retriever_weight: 0.90
     edge_defaults: ["derived_from", "references"]
-    # Hybrid Classifier: Wenn diese Worte fallen, ist es eine Experience
-    detection_keywords: 
-      - "passiert"
-      - "erlebt"
-      - "gefühl"
-      - "situation"
-      - "stolz"
-      - "geärgert"
-      - "reaktion"
-      - "moment"
-      - "konflikt"
-    # Ghostwriter Schema: Sprechende Anweisungen für besseren Textfluss
+    detection_keywords: ["passiert", "erlebt", "gefühl", "situation", "reaktion"]
     schema:
       - "Situation (Was ist passiert?)"
       - "Meine Reaktion (Was habe ich getan?)"
@@ -87,48 +98,37 @@ types:
     chunking_profile: sliding_smart_edges
     retriever_weight: 0.97
     edge_defaults: ["references", "depends_on"] 
-    detection_keywords:
-      - "projekt"
-      - "vorhaben"
-      - "ziel ist"
-      - "meilenstein"
-      - "planen"
-      - "starten"
-      - "mission"
+    detection_keywords: ["projekt", "vorhaben", "ziel ist", "planen", "starten"]
     schema:
       - "Mission & Zielsetzung"
       - "Aktueller Status & Blockaden"
       - "Nächste konkrete Schritte"
-      - "Stakeholder & Ressourcen"
 
   decision:
-    chunking_profile: structured_smart_edges 
-    retriever_weight: 1.00 # MAX: Entscheidungen sind Gesetz
+    # Strict, damit jede Entscheidung atomar bleibt
+    chunking_profile: structured_smart_edges_strict 
+    retriever_weight: 1.00 
     edge_defaults: ["caused_by", "references"]
-    detection_keywords:
-      - "entschieden"
-      - "wahl"
-      - "optionen"
-      - "alternativen"
-      - "beschluss"
-      - "adr"
+    detection_keywords: ["entschieden", "wahl", "optionen", "alternativen", "adr"]
     schema:
       - "Kontext & Problemstellung"
-      - "Betrachtete Optionen (Alternativen)"
+      - "Betrachtete Optionen"
       - "Die Entscheidung"
-      - "Begründung (Warum diese Wahl?)"
+      - "Begründung"
 
   # --- PERSÖNLICHKEIT & IDENTITÄT ---
 
   value:
-    chunking_profile: structured_smart_edges
+    # Strict, damit Werte nicht verschwimmen
+    chunking_profile: structured_smart_edges_strict
     retriever_weight: 1.00
     edge_defaults: ["related_to"]
     detection_keywords: ["wert", "wichtig ist", "moral", "ethik"]
-    schema: ["Definition", "Warum mir das wichtig ist", "Leitsätze für den Alltag"]
+    schema: ["Definition", "Warum mir das wichtig ist", "Leitsätze"]
 
   principle:
-    chunking_profile: structured_smart_edges
+    # L3 Strict für P3/P3a und Tier2/MP1 Logik
+    chunking_profile: structured_smart_edges_strict_L3
     retriever_weight: 0.95
     edge_defaults: ["derived_from", "references"]
     detection_keywords: ["prinzip", "regel", "grundsatz", "leitlinie"]
@@ -138,11 +138,11 @@ types:
     chunking_profile: sliding_short
     retriever_weight: 0.90
     edge_defaults: ["related_to"]
-    detection_keywords: ["glaube", "überzeugung", "denke dass", "meinung"]
     schema: ["Der Glaubenssatz", "Ursprung & Reflexion"]
 
   profile:
-    chunking_profile: structured_smart_edges
+    # Strict: Jede Rolle (H2) muss ein eigener Chunk sein
+    chunking_profile: structured_smart_edges_strict 
     retriever_weight: 0.70
     edge_defaults: ["references", "related_to"]
     schema: ["Rolle / Identität", "Fakten & Daten", "Historie"]
@@ -159,8 +159,8 @@ types:
     chunking_profile: sliding_short
     retriever_weight: 0.85
     edge_defaults: ["related_to", "blocks"]
-    detection_keywords: ["risiko", "gefahr", "bedrohung", "problem", "angst"]
-    schema: ["Beschreibung des Risikos", "Mögliche Auswirkungen", "Gegenmaßnahmen"]
+    detection_keywords: ["risiko", "gefahr", "bedrohung"]
+    schema: ["Beschreibung des Risikos", "Auswirkungen", "Gegenmaßnahmen"]
 
   # --- BASIS & WISSEN ---
 
@@ -168,10 +168,7 @@ types:
     chunking_profile: sliding_smart_edges
     retriever_weight: 0.60
     edge_defaults: ["references", "related_to"]
-    schema:
-      - "Definition"
-      - "Kontext & Hintergrund"
-      - "Verwandte Konzepte"
+    schema: ["Definition", "Kontext", "Verwandte Konzepte"]
 
   task:
     chunking_profile: sliding_short
@@ -183,19 +180,36 @@ types:
     chunking_profile: sliding_standard
     retriever_weight: 0.80
     edge_defaults: ["references", "related_to"]
-    schema: ["Log-Eintrag", "Gedanken & Erkenntnisse"]
+    schema: ["Log-Eintrag", "Gedanken"]
 
   source:
     chunking_profile: sliding_standard
     retriever_weight: 0.50
     edge_defaults: [] 
-    schema:
-      - "Metadaten (Autor, URL, Datum)"
-      - "Kernaussage / Zusammenfassung"
-      - "Zitate & Notizen"
+    schema: ["Metadaten", "Zusammenfassung", "Zitate"]
 
   glossary:
     chunking_profile: sliding_short
     retriever_weight: 0.40
     edge_defaults: ["related_to"]
-    schema: ["Begriff", "Definition"]
\ No newline at end of file
+    schema: ["Begriff", "Definition"]
+    
+  person:
+    chunking_profile: sliding_standard
+    retriever_weight: 0.50
+    edge_defaults: ["related_to"]
+    schema: ["Rolle", "Beziehung", "Kontext"]
+
+  event:
+    chunking_profile: sliding_standard
+    retriever_weight: 0.60
+    edge_defaults: ["related_to"]
+    schema: ["Datum & Ort", "Teilnehmer", "Ergebnisse"]
+
+  # --- FALLBACK ---
+
+  default:
+    chunking_profile: sliding_standard
+    retriever_weight: 1.00
+    edge_defaults: ["references"]
+    schema: ["Inhalt"]
\ No newline at end of file