neue architekturaufteilung für chat in WP11 gebaut

2025-12-12 16:32:11 +01:00 · 2025-12-12 16:32:11 +01:00 · 0b8f0a6c22
commit 0b8f0a6c22
parent 30047f8e00
4 changed files with 286 additions and 265 deletions
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -1,6 +1,6 @@
 """
 app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router + WP-07 Interview Mode)
-Version: 2.4.0 (Interview Support)
+Version: 2.4.1 (Fix: Type-based Intent Detection)

 Features:
 - Hybrid Intent Router (Keyword + LLM)
@ -8,14 +8,16 @@ Features:
 - Interview Loop (Schema-driven Data Collection)
 - Context Enrichment (Payload/Source Fallback)
 - Data Flywheel (Feedback Logging Integration)
+- NEU: Lädt detection_keywords aus types.yaml für präzise Erkennung.
 """

 from fastapi import APIRouter, HTTPException, Depends
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 import time
 import uuid
 import logging
 import yaml
+import os
 from pathlib import Path

 from app.config import get_settings
@ -30,6 +32,7 @@ logger = logging.getLogger(__name__)
 # --- Helper: Config Loader ---

 _DECISION_CONFIG_CACHE = None
+_TYPES_CONFIG_CACHE = None

 def _load_decision_config() -> Dict[str, Any]:
    settings = get_settings()
@ -51,15 +54,32 @@ def _load_decision_config() -> Dict[str, Any]:
        logger.error(f"Failed to load decision config: {e}")
        return default_config

+def _load_types_config() -> Dict[str, Any]:
+    """Lädt die types.yaml für Keyword-Erkennung."""
+    path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return yaml.safe_load(f) or {}
+    except Exception:
+        return {}
+
 def get_full_config() -> Dict[str, Any]:
    global _DECISION_CONFIG_CACHE
    if _DECISION_CONFIG_CACHE is None:
        _DECISION_CONFIG_CACHE = _load_decision_config()
    return _DECISION_CONFIG_CACHE

+def get_types_config() -> Dict[str, Any]:
+    global _TYPES_CONFIG_CACHE
+    if _TYPES_CONFIG_CACHE is None:
+        _TYPES_CONFIG_CACHE = _load_types_config()
+    return _TYPES_CONFIG_CACHE
+
 def get_decision_strategy(intent: str) -> Dict[str, Any]:
    config = get_full_config()
    strategies = config.get("strategies", {})
+    # Fallback: Wenn Intent INTERVIEW ist, aber nicht konfiguriert, nehme FACT
+    # (Aber INTERVIEW sollte in decision_engine.yaml stehen!)
    return strategies.get(intent, strategies.get("FACT", {}))

 # --- Helper: Target Type Detection (WP-07) ---
@ -67,40 +87,40 @@ def get_decision_strategy(intent: str) -> Dict[str, Any]:
 def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
    """
    Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
-    Nutzt Keywords und Mappings.
+    Nutzt Keywords aus types.yaml UND Mappings.
    """
    message_lower = message.lower()
    
-    # 1. Direkter Match mit Schema-Keys (z.B. "projekt", "entscheidung")
-    # Ignoriere 'default' hier
+    # 1. Check types.yaml detection_keywords (Priority!)
+    types_cfg = get_types_config()
+    types_def = types_cfg.get("types", {})
+    
+    for type_name, type_data in types_def.items():
+        keywords = type_data.get("detection_keywords", [])
+        for kw in keywords:
+            if kw.lower() in message_lower:
+                return type_name
+
+    # 2. Direkter Match mit Schema-Keys
    for type_key in configured_schemas.keys():
-        if type_key == "default":
-            continue
+        if type_key == "default": continue
        if type_key in message_lower:
            return type_key
            
-    # 2. Synonym-Mapping (Deutsch -> Schema Key)
-    # Dies verbessert die UX, falls User deutsche Begriffe nutzen
+    # 3. Synonym-Mapping (Legacy Fallback)
    synonyms = {
-        "projekt": "project",
-        "vorhaben": "project",
-        "entscheidung": "decision",
-        "beschluss": "decision",
+        "projekt": "project", "vorhaben": "project",
+        "entscheidung": "decision", "beschluss": "decision",
        "ziel": "goal",
-        "erfahrung": "experience",
-        "lektion": "experience",
+        "erfahrung": "experience", "lektion": "experience",
        "wert": "value",
        "prinzip": "principle",
-        "grundsatz": "principle",
-        "notiz": "default",
-        "idee": "default"
+        "notiz": "default", "idee": "default"
    }
    
    for term, schema_key in synonyms.items():
        if term in message_lower:
-            # Prüfen, ob der gemappte Key auch konfiguriert ist
-            if schema_key in configured_schemas:
-                return schema_key
+            return schema_key
                
    return "default"

@ -126,7 +146,6 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
        )
        title = hit.note_id or "Unbekannt"
        
-        # [FIX] Robustes Auslesen des Typs (Payload > Source > Unknown)
        payload = hit.payload or {}
        note_type = payload.get("type") or source.get("type", "unknown")
        note_type = str(note_type).upper()
@ -142,52 +161,58 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:

 async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
    """
-    Hybrid Router v3: 
-    Gibt Tuple zurück: (Intent, Source)
+    Hybrid Router v4: 
+    1. Decision Keywords (Strategie)
+    2. Type Keywords (Interview Trigger)
+    3. LLM (Fallback)
    """
    config = get_full_config()
    strategies = config.get("strategies", {})
    settings = config.get("settings", {})
    
    query_lower = query.lower()
-    best_intent = None
-    max_match_length = 0
    
-    # 1. FAST PATH: Keywords
+    # 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
    for intent_name, strategy in strategies.items():
        if intent_name == "FACT": continue
        keywords = strategy.get("trigger_keywords", [])
        for k in keywords:
            if k.lower() in query_lower:
-                if len(k) > max_match_length:
-                    max_match_length = len(k)
-                    best_intent = intent_name
+                return intent_name, "Keyword (Strategy)"
    
-    if best_intent:
-        return best_intent, "Keyword (Fast Path)"
+    # 2. FAST PATH B: Type Keywords (z.B. "Projekt", "passiert") -> INTERVIEW
+    # Wir prüfen, ob ein Typ erkannt wird. Wenn ja -> Interview.
+    # Wir laden Schemas nicht hier, sondern nutzen types.yaml global
+    types_cfg = get_types_config()
+    types_def = types_cfg.get("types", {})
    
-    # 2. SLOW PATH: LLM Router
+    for type_name, type_data in types_def.items():
+        keywords = type_data.get("detection_keywords", [])
+        for kw in keywords:
+            if kw.lower() in query_lower:
+                return "INTERVIEW", f"Keyword (Type: {type_name})"
+
+    # 3. SLOW PATH: LLM Router
    if settings.get("llm_fallback_enabled", False):
        router_prompt_template = settings.get("llm_router_prompt", "")
        if router_prompt_template:
            prompt = router_prompt_template.replace("{query}", query)
            logger.info("Keywords failed. Asking LLM for Intent...")
            
-            raw_response = await llm.generate_raw_response(prompt)
+            try:
+                raw_response = await llm.generate_raw_response(prompt)
+                llm_output_upper = raw_response.upper()
                
-            # Parsing logic
-            llm_output_upper = raw_response.upper()
-            found_intents = []
-            for strat_key in strategies.keys():
-                if strat_key in llm_output_upper:
-                    found_intents.append(strat_key)
+                # Zuerst INTERVIEW prüfen (LLMs erkennen oft "Create" Intention)
+                if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
+                    return "INTERVIEW", "LLM Router"

-            if len(found_intents) == 1:
-                return found_intents[0], "LLM Router (Slow Path)"
-            elif len(found_intents) > 1:
-                return found_intents[0], f"LLM Ambiguous {found_intents}"
-            else:
-                return "FACT", "LLM Fallback (No Match)"
+                for strat_key in strategies.keys():
+                    if strat_key in llm_output_upper:
+                        return strat_key, "LLM Router"
+                        
+            except Exception as e:
+                logger.error(f"Router LLM failed: {e}")
                
    return "FACT", "Default (No Match)"

@ -202,7 +227,7 @@ async def chat_endpoint(
    logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")

    try:
-        # 1. Intent Detection (mit Source)
+        # 1. Intent Detection
        intent, intent_source = await _classify_intent(request.message, llm)
        logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")

@ -210,57 +235,55 @@ async def chat_endpoint(
        strategy = get_decision_strategy(intent)
        prompt_key = strategy.get("prompt_template", "rag_template")
        
-        # --- SPLIT LOGIC: INTERVIEW vs. RAG ---
-        
        sources_hits = []
        final_prompt = ""
        
        if intent == "INTERVIEW":
-            # --- WP-07: INTERVIEW MODE ---
-            # Kein Retrieval. Wir nutzen den Dialog-Kontext.
+            # --- INTERVIEW MODE ---
+            # Wir müssen jetzt herausfinden, WELCHES Schema wir nutzen.
+            # Dazu schauen wir wieder in die types.yaml (via _detect_target_type)
            
-            # 1. Schema Loading (Late Binding)
-            schemas = strategy.get("schemas", {})
-            target_type = _detect_target_type(request.message, schemas)
-            active_schema = schemas.get(target_type, schemas.get("default"))
+            # Schemas aus decision_engine.yaml laden (falls dort overrides sind)
+            # oder generisch aus types.yaml bauen (besser!)
            
-            logger.info(f"[{query_id}] Starting Interview for Type: {target_type}")
+            # Strategie: Wir nutzen _detect_target_type, das jetzt types.yaml kennt.
+            target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
            
-            # Robustes Schema-Parsing (Dict vs List)
-            if isinstance(active_schema, dict):
-                fields_list = active_schema.get("fields", [])
-                hint_str = active_schema.get("hint", "")
-            else:
-                fields_list = active_schema # Fallback falls nur Liste definiert
-                hint_str = ""
+            # Schema laden (aus types.yaml bevorzugt)
+            types_cfg = get_types_config()
+            type_def = types_cfg.get("types", {}).get(target_type, {})
+            
+            # Hole Schema-Felder aus types.yaml (schema: [...])
+            fields_list = type_def.get("schema", [])
+            
+            # Fallback auf decision_engine.yaml, falls in types.yaml nichts steht
+            if not fields_list:
+                configured_schemas = strategy.get("schemas", {})
+                fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
+                if isinstance(fallback_schema, dict):
+                    fields_list = fallback_schema.get("fields", [])
+                else:
+                    fields_list = fallback_schema or []
+
+            logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
            
            fields_str = "\n- " + "\n- ".join(fields_list)
            
-            # 2. Context Logic
-            # Hinweis: In einer Stateless-API ist {context_str} idealerweise die History.
-            # Da ChatRequest (noch) kein History-Feld hat, nutzen wir einen Placeholder
-            # oder verlassen uns darauf, dass der Client die History im Prompt mitschickt 
-            # (Streamlit Pattern: Appends history to prompt).
-            # Wir labeln es hier explizit.
-            context_str = "Bisheriger Verlauf (falls vorhanden): Siehe oben/unten." 
-            
-            # 3. Prompt Assembly
+            # Prompt Assembly
            template = llm.prompts.get(prompt_key, "")
-            final_prompt = template.replace("{context_str}", context_str) \
+            final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
                                   .replace("{query}", request.message) \
                                   .replace("{target_type}", target_type) \
                                   .replace("{schema_fields}", fields_str) \
-                                   .replace("{schema_hint}", hint_str)
+                                   .replace("{schema_hint}", "")
                                   
-            # Keine Hits im Interview
            sources_hits = []
            
        else:
-            # --- WP-06: STANDARD RAG MODE ---
+            # --- RAG MODE ---
            inject_types = strategy.get("inject_types", [])
            prepend_instr = strategy.get("prepend_instruction", "")

-            # 2. Primary Retrieval
            query_req = QueryRequest(
                query=request.message,
                mode="hybrid",     
@ -270,9 +293,7 @@ async def chat_endpoint(
            retrieve_result = await retriever.search(query_req)
            hits = retrieve_result.results
            
-            # 3. Strategic Retrieval (WP-06 Kernfeature)
            if inject_types:
-                logger.info(f"[{query_id}] Executing Strategic Retrieval for types: {inject_types}...")
                strategy_req = QueryRequest(
                    query=request.message,
                    mode="hybrid",
@ -281,19 +302,16 @@ async def chat_endpoint(
                    explain=False
                )
                strategy_result = await retriever.search(strategy_req)
-                
                existing_ids = {h.node_id for h in hits}
                for strat_hit in strategy_result.results:
                    if strat_hit.node_id not in existing_ids:
                        hits.append(strat_hit)

-            # 4. Context Building
            if not hits:
                context_str = "Keine relevanten Notizen gefunden."
            else:
                context_str = _build_enriched_context(hits)

-            # 5. Generation Setup
            template = llm.prompts.get(prompt_key, "{context_str}\n\n{query}")
            
            if prepend_instr:
@ -302,35 +320,25 @@ async def chat_endpoint(
            final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
            sources_hits = hits
        
-        # --- COMMON GENERATION ---
-        
+        # --- GENERATION ---
        system_prompt = llm.prompts.get("system_prompt", "")
        
-        logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
-        
-        # System-Prompt separat übergeben
+        # Hier nutzen wir das erhöhte Timeout aus dem LLMService Update
        answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)

        duration_ms = int((time.time() - start_time) * 1000)
        
-        # 6. Logging (Fire & Forget)
+        # Logging
        try:
            log_search(
                query_id=query_id,
                query_text=request.message,
                results=sources_hits,
                mode="interview" if intent == "INTERVIEW" else "chat_rag",
-                metadata={
-                    "intent": intent,
-                    "intent_source": intent_source,
-                    "generated_answer": answer_text,
-                    "model": llm.settings.LLM_MODEL
-                }
+                metadata={"intent": intent, "source": intent_source}
            )
-        except Exception as e:
-            logger.error(f"Logging failed: {e}")
+        except: pass

-        # 7. Response
        return ChatResponse(
            query_id=query_id,
            answer=answer_text,
--- a/config/decision_engine.yaml
+++ b/config/decision_engine.yaml
@ -1,32 +1,31 @@
 # config/decision_engine.yaml
-# Steuerung der Decision Engine (WP-06 + WP-07)
-# Hybrid-Modus: Keywords (Fast) + LLM Router (Smart Fallback)
-version: 1.3
+# Steuerung der Decision Engine (Intent Recognition)
+# Version: 2.4.0 (Clean Architecture: Generic Intents only)
+
+version: 1.4

 settings:
  llm_fallback_enabled: true
  
-  # Few-Shot Prompting für bessere SLM-Performance
-  # Erweitert um INTERVIEW Beispiele
+  # Few-Shot Prompting für den LLM-Router (Slow Path)
  llm_router_prompt: |
    Du bist ein Klassifikator. Analysiere die Nachricht und wähle die passende Strategie.
    Antworte NUR mit dem Namen der Strategie.
    
    STRATEGIEN:
-    - INTERVIEW: User will Wissen strukturieren, Notizen anlegen, Projekte starten ("Neu", "Festhalten").
+    - INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
    - DECISION: Rat, Strategie, Vor/Nachteile, "Soll ich".
-    - EMPATHY: Gefühle, Frust, Freude, Probleme, "Alles ist sinnlos", "Ich bin traurig".
-    - CODING: Code, Syntax, Programmierung, Python.
+    - EMPATHY: Gefühle, Frust, Freude, Probleme.
+    - CODING: Code, Syntax, Programmierung.
    - FACT: Wissen, Fakten, Definitionen.
    
    BEISPIELE:
    User: "Wie funktioniert Qdrant?" -> FACT
    User: "Soll ich Qdrant nutzen?" -> DECISION
-    User: "Ich möchte ein neues Projekt anlegen" -> INTERVIEW
-    User: "Lass uns eine Entscheidung festhalten" -> INTERVIEW
+    User: "Ich möchte etwas notieren" -> INTERVIEW
+    User: "Lass uns das festhalten" -> INTERVIEW
    User: "Schreibe ein Python Script" -> CODING
    User: "Alles ist grau und sinnlos" -> EMPATHY
-    User: "Mir geht es heute gut" -> EMPATHY
    
    NACHRICHT: "{query}"
    
@ -51,11 +50,9 @@ strategies:
      - "empfehlung"
      - "strategie"
      - "entscheidung"
-      - "wert"
-      - "prinzip"
-      - "vor- und nachteile"
      - "abwägung"
-    inject_types: ["value", "principle", "goal"]
+      - "vergleich"
+    inject_types: ["value", "principle", "goal", "risk"]
    prompt_template: "decision_template"
    prepend_instruction: |
      !!! ENTSCHEIDUNGS-MODUS !!!
@ -72,6 +69,7 @@ strategies:
      - "angst"
      - "nervt"
      - "überfordert"
+      - "müde"
    inject_types: ["experience", "belief", "profile"]
    prompt_template: "empathy_template"
    prepend_instruction: null
@ -88,56 +86,37 @@ strategies:
      - "syntax"
      - "json"
      - "yaml"
+      - "bash"
    inject_types: ["snippet", "reference", "source"]
    prompt_template: "technical_template"
    prepend_instruction: null

-  # 5. Interview / Datenerfassung (WP-07)
+  # 5. Interview / Datenerfassung
+  # HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch 
+  # über die types.yaml erkannt. Hier stehen nur generische Trigger.
  INTERVIEW:
-    description: "Der User möchte strukturiertes Wissen erfassen (Projekt, Notiz, Idee)."
+    description: "Der User möchte Wissen erfassen."
    trigger_keywords:
      - "neue notiz"
-      - "neues projekt"
-      - "neue entscheidung"
-      - "neues ziel"
+      - "etwas notieren"
      - "festhalten"
-      - "entwurf erstellen"
-      - "interview"
+      - "erstellen"
      - "dokumentieren"
+      - "anlegen"
+      - "interview"
      - "erfassen"
      - "idee speichern"
-    inject_types: [] # Keine RAG-Suche, reiner Kontext-Dialog
+      - "draft"
+    inject_types: [] 
    prompt_template: "interview_template"
    prepend_instruction: null
    
-    # LATE BINDING SCHEMAS:
-    # Definition der Pflichtfelder pro Typ (korrespondiert mit types.yaml)
-    # Wenn ein Typ hier fehlt, wird 'default' genutzt.
+    # Schemas: Hier nur der Fallback. 
+    # Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml!
    schemas:
      default: 
-        fields: ["Titel", "Thema/Inhalt", "Tags"]
+        fields: 
+          - "Titel"
+          - "Thema/Inhalt"
+          - "Tags"
        hint: "Halte es einfach und übersichtlich."
-
-      project:
-        fields: ["Titel", "Zielsetzung (Goal)", "Status (draft/active)", "Wichtige Stakeholder", "Nächste Schritte"]
-        hint: "Achte darauf, Abhängigkeiten zu anderen Projekten mit [[rel:depends_on]] zu erfragen."
-
-      decision:
-        fields: ["Titel", "Kontext (Warum entscheiden wir?)", "Getroffene Entscheidung", "Betrachtete Alternativen", "Status (proposed/final)"]
-        hint: "Wichtig: Frage explizit nach den Gründen gegen die Alternativen."
-
-      goal:
-        fields: ["Titel", "Zeitrahmen (Deadline)", "Messkriterien (KPIs)", "Verbundene Werte"]
-        hint: "Ziele sollten SMART formuliert sein."
-
-      experience:
-        fields: ["Titel", "Situation (Kontext)", "Erkenntnis (Learning)", "Emotionale Keywords (für Empathie-Suche)"]
-        hint: "Fokussiere dich auf die persönliche Lektion."
-
-      value:
-        fields: ["Titel (Name des Werts)", "Definition (Was bedeutet das für uns?)", "Anti-Beispiel (Was ist es nicht?)"]
-        hint: "Werte dienen als Entscheidungsgrundlage."
-      
-      principle:
-        fields: ["Titel", "Handlungsanweisung", "Begründung"]
-        hint: "Prinzipien sind härter als Werte."
--- a/config/prompts.yaml
+++ b/config/prompts.yaml
@ -99,44 +99,37 @@ technical_template: |
 # ---------------------------------------------------------
 interview_template: |
  TASK:
-  Erstelle einen Markdown-Entwurf für eine Notiz vom Typ '{target_type}'.
+  Du bist ein professioneller Ghostwriter. Verwandle den "USER INPUT" in eine strukturierte Notiz vom Typ '{target_type}'.
  
-  SCHEMA (Inhaltliche Pflichtfelder für den Body):
+  STRUKTUR (Nutze EXAKT diese Überschriften):
  {schema_fields}
  
  USER INPUT:
  "{query}"
  
-  ANWEISUNG:
-  1. Extrahiere Informationen aus dem Input.
-  2. Generiere validen Markdown.
+  ANWEISUNG ZUM INHALT:
+  1. Analysiere den Input genau.
+  2. Schreibe die Inhalte unter die passenden Überschriften aus der STRUKTUR-Liste oben.
+  3. STIL: Schreibe flüssig, professionell und in der Ich-Perspektive. Korrigiere Grammatikfehler, aber behalte den persönlichen Ton bei.
+  4. Wenn Informationen für einen Abschnitt fehlen, schreibe nur: "[TODO: Ergänzen]". Erfinde nichts dazu.
  
-  OUTPUT REGELN (STRIKT BEACHTEN):
-  A. FRONTMATTER (YAML):
-     - 'type': Muss '{target_type}' sein (oder 'experience', 'project' etc.). NIEMALS 'draft'.
-     - 'status': Muss IMMER 'draft' sein.
-     - 'tags': Eine JSON-Liste von Strings OHNE Hashtags. Beispiel: ['Recycling', 'Konflikt']. NICHT: [#Recycling].
-     - Keine Sätze im YAML, nur Daten.
-  
-  B. BODY (Markdown):
-     - Nutze für jedes Schema-Feld eine Markdown-Überschrift (## Feldname).
-     - Schreibe den Inhalt DARUNTER.
-  
-  HINWEIS ZUM TYP:
-  {schema_hint}
-  
-  OUTPUT FORMAT BEISPIEL:
-  ```markdown
+  OUTPUT FORMAT (YAML + MARKDOWN):
  ---
  type: {target_type}
  status: draft
-  title: ...
-  tags: ["Tag1", "Tag2"]
+  title: (Erstelle einen treffenden, kurzen Titel für den Inhalt)
+  tags: [Tag1, Tag2]
  ---
-  # Titel der Notiz
  
-  ## Erstes Schema Feld
-  Der Inhalt hier...
+  # (Wiederhole den Titel hier)
+  
+  ## (Erster Begriff aus STRUKTUR)
+  (Text...)
+  
+  ## (Zweiter Begriff aus STRUKTUR)
+  (Text...)
+  
+  (usw.)
 

 # ---------------------------------------------------------
--- a/config/types.yaml
+++ b/config/types.yaml
@ -1,4 +1,4 @@
-version: 1.6 # Balance zwischen Speed, Kontext und Smartness
+version: 2.4.0 # Optimized for Async Intelligence & Hybrid Router

 # ==============================================================================
 # 1. CHUNKING PROFILES
@ -6,42 +6,40 @@ version: 1.6 # Balance zwischen Speed, Kontext und Smartness

 chunking_profiles:
  
-  # A. SHORT & FAST (Für atomare Schnipsel)
-  # Einsatz: Glossar, Tasks, Risiken
-  # Vorteil: Präzise Treffer für kurze Infos.
+  # A. SHORT & FAST
+  # Für Glossar, Tasks, Risiken. Kleine Schnipsel.
  sliding_short:
    strategy: sliding_window
-    enable_smart_edge_allocation: false # AUS (Speed)
+    enable_smart_edge_allocation: false
    target: 200
    max: 350
    overlap: [30, 50]

-  # B. STANDARD & FAST (Der neue "Mittelweg")
-  # Einsatz: Quellen, Journal, Daily Logs
-  # Vorteil: Viel Kontext für RAG, aber rasendschneller Import ohne LLM.
+  # B. STANDARD & FAST
+  # Der "Traktor": Robust für Quellen, Journal, Daily Logs.
  sliding_standard:
    strategy: sliding_window
-    enable_smart_edge_allocation: false # AUS (Speed)
-    target: 450  # Größerer Kontext!
+    enable_smart_edge_allocation: false
+    target: 450
    max: 650
    overlap: [50, 100]

-  # C. SMART FLOW (Premium Chunking)
-  # Einsatz: Konzepte, Projekte, Erfahrungen
-  # Vorteil: LLM prüft Inhalt und verlinkt präzise. Kostet Zeit.
+  # C. SMART FLOW (Performance-Safe Mode)
+  # Für Konzepte, Projekte, Erfahrungen.
+  # HINWEIS: 'enable_smart_edge_allocation' ist vorerst FALSE, um Ollama
+  # bei der Generierung nicht zu überlasten. Später wieder aktivieren.
  sliding_smart_edges:
    strategy: sliding_window
-    enable_smart_edge_allocation: true # AN (Intelligenz)
+    enable_smart_edge_allocation: false 
    target: 400
    max: 600
    overlap: [50, 80]

  # D. SMART STRUCTURE
-  # Einsatz: Profile, Werte, Prinzipien
-  # Vorteil: Respektiert die Markdown-Struktur (H2).
+  # Für Profile, Werte, Prinzipien. Trennt hart an Überschriften (H2).
  structured_smart_edges:
    strategy: by_heading
-    enable_smart_edge_allocation: true # AN (Intelligenz)
+    enable_smart_edge_allocation: false
    split_level: 2
    max: 600
    target: 400
@ -52,7 +50,7 @@ chunking_profiles:
 # ==============================================================================
 defaults:
  retriever_weight: 1.0
-  chunking_profile: sliding_standard # Fallback auf Standard (sicher & performant)
+  chunking_profile: sliding_standard
  edge_defaults: [] 

 # ==============================================================================
@ -61,53 +59,110 @@ defaults:

 types:

-  # --- MASSENDATEN (Speed + Kontext) ---
+  # --- KERNTYPEN (Hoch priorisiert & Smart) ---

-  source:
-    chunking_profile: sliding_standard # JETZT: Mehr Kontext (450 Token), trotzdem schnell
-    retriever_weight: 0.50
-    edge_defaults: [] 
+  experience:
+    chunking_profile: sliding_smart_edges
+    retriever_weight: 0.90
+    edge_defaults: ["derived_from", "references"]
+    # Hybrid Classifier: Wenn diese Worte fallen, ist es eine Experience
+    detection_keywords: 
+      - "passiert"
+      - "erlebt"
+      - "gefühl"
+      - "situation"
+      - "stolz"
+      - "geärgert"
+      - "reaktion"
+      - "moment"
+      - "konflikt"
+    # Ghostwriter Schema: Sprechende Anweisungen für besseren Textfluss
    schema:
-      - "Metadaten (Autor, URL, Datum)"
-      - "Zusammenfassung"
-      - "Originaltext / Ausschnitte"
+      - "Situation (Was ist passiert?)"
+      - "Meine Reaktion (Was habe ich getan?)"
+      - "Ergebnis & Auswirkung"
+      - "Reflexion & Learning (Was lerne ich daraus?)"

-  journal:
-    chunking_profile: sliding_standard # JETZT: Mehr Kontext für Tagebucheinträge
-    retriever_weight: 0.80
-    edge_defaults: ["references", "related_to"]
+  project:
+    chunking_profile: sliding_smart_edges
+    retriever_weight: 0.97
+    edge_defaults: ["references", "depends_on"] 
+    detection_keywords:
+      - "projekt"
+      - "vorhaben"
+      - "ziel ist"
+      - "meilenstein"
+      - "planen"
+      - "starten"
+      - "mission"
    schema:
-      - "Tages-Log"
-      - "Erkenntnisse"
-      - "Entscheidungen"
+      - "Mission & Zielsetzung"
+      - "Aktueller Status & Blockaden"
+      - "Nächste konkrete Schritte"
+      - "Stakeholder & Ressourcen"

-  # --- ATOMARE DATEN (Speed + Präzision) ---
+  decision:
+    chunking_profile: structured_smart_edges 
+    retriever_weight: 1.00 # MAX: Entscheidungen sind Gesetz
+    edge_defaults: ["caused_by", "references"]
+    detection_keywords:
+      - "entschieden"
+      - "wahl"
+      - "optionen"
+      - "alternativen"
+      - "beschluss"
+      - "adr"
+    schema:
+      - "Kontext & Problemstellung"
+      - "Betrachtete Optionen (Alternativen)"
+      - "Die Entscheidung"
+      - "Begründung (Warum diese Wahl?)"

-  task:
-    chunking_profile: sliding_short # Kurz halten
-    retriever_weight: 0.80
-    edge_defaults: ["depends_on", "part_of"]
-    schema: ["Aufgabe", "Kontext", "DoD"]
+  # --- PERSÖNLICHKEIT & IDENTITÄT ---

-  glossary:
-    chunking_profile: sliding_short # Kurz halten
-    retriever_weight: 0.40
+  value:
+    chunking_profile: structured_smart_edges
+    retriever_weight: 1.00
    edge_defaults: ["related_to"]
-    schema: ["Begriff", "Definition"]
+    detection_keywords: ["wert", "wichtig ist", "moral", "ethik"]
+    schema: ["Definition", "Warum mir das wichtig ist", "Leitsätze für den Alltag"]

-  risk:
-    chunking_profile: sliding_short
-    retriever_weight: 0.85
-    edge_defaults: ["related_to", "blocks"]
-    schema: ["Beschreibung", "Mitigation"]
+  principle:
+    chunking_profile: structured_smart_edges
+    retriever_weight: 0.95
+    edge_defaults: ["derived_from", "references"]
+    detection_keywords: ["prinzip", "regel", "grundsatz", "leitlinie"]
+    schema: ["Das Prinzip", "Anwendung & Beispiele"]

  belief:
    chunking_profile: sliding_short
    retriever_weight: 0.90
    edge_defaults: ["related_to"]
-    schema: ["Glaubenssatz", "Reflexion"]
+    detection_keywords: ["glaube", "überzeugung", "denke dass", "meinung"]
+    schema: ["Der Glaubenssatz", "Ursprung & Reflexion"]

-  # --- KERN-WISSEN (Smart Edges / LLM Active) ---
+  profile:
+    chunking_profile: structured_smart_edges
+    retriever_weight: 0.70
+    edge_defaults: ["references", "related_to"]
+    schema: ["Rolle / Identität", "Fakten & Daten", "Historie"]
+
+  # --- STRATEGIE & RISIKO ---
+
+  goal:
+    chunking_profile: sliding_smart_edges
+    retriever_weight: 0.95
+    edge_defaults: ["depends_on", "related_to"]
+    schema: ["Zielzustand", "Zeitrahmen & KPIs", "Motivation"]
+
+  risk:
+    chunking_profile: sliding_short
+    retriever_weight: 0.85
+    edge_defaults: ["related_to", "blocks"]
+    detection_keywords: ["risiko", "gefahr", "bedrohung", "problem", "angst"]
+    schema: ["Beschreibung des Risikos", "Mögliche Auswirkungen", "Gegenmaßnahmen"]
+
+  # --- BASIS & WISSEN ---

  concept:
    chunking_profile: sliding_smart_edges
@ -115,46 +170,32 @@ types:
    edge_defaults: ["references", "related_to"]
    schema:
      - "Definition"
-      - "Kontext"
+      - "Kontext & Hintergrund"
      - "Verwandte Konzepte"

-  project:
-    chunking_profile: sliding_smart_edges
-    retriever_weight: 0.97
-    edge_defaults: ["references", "depends_on"] 
-    schema:
-      - "Mission"
-      - "Status"
-      - "Next Actions"
+  task:
+    chunking_profile: sliding_short
+    retriever_weight: 0.80
+    edge_defaults: ["depends_on", "part_of"]
+    schema: ["Aufgabe", "Kontext", "Definition of Done"]

-  experience:
-    chunking_profile: sliding_smart_edges
-    retriever_weight: 0.90
-    edge_defaults: ["derived_from", "references"]
-    schema: ["Situation", "Aktion", "Ergebnis", "Learning"]
-
-  # --- STRUKTUR-DATEN (Smart Structure / LLM Active) ---
-
-  profile:
-    chunking_profile: structured_smart_edges
-    retriever_weight: 0.70
+  journal:
+    chunking_profile: sliding_standard
+    retriever_weight: 0.80
    edge_defaults: ["references", "related_to"]
-    schema: ["Rolle", "Fakten", "Historie"]
+    schema: ["Log-Eintrag", "Gedanken & Erkenntnisse"]

-  value:
-    chunking_profile: structured_smart_edges
-    retriever_weight: 1.00
+  source:
+    chunking_profile: sliding_standard
+    retriever_weight: 0.50
+    edge_defaults: [] 
+    schema:
+      - "Metadaten (Autor, URL, Datum)"
+      - "Kernaussage / Zusammenfassung"
+      - "Zitate & Notizen"
+
+  glossary:
+    chunking_profile: sliding_short
+    retriever_weight: 0.40
    edge_defaults: ["related_to"]
-    schema: ["Definition", "Motivation", "Leitsätze"]
-
-  principle:
-    chunking_profile: structured_smart_edges
-    retriever_weight: 0.95
-    edge_defaults: ["derived_from", "references"]
-    schema: ["Prinzip", "Anwendung"]
-
-  decision:
-    chunking_profile: structured_smart_edges 
-    retriever_weight: 1.00
-    edge_defaults: ["caused_by", "references"]
-    schema: ["Problem", "Optionen", "Entscheidung", "Warum"]
+    schema: ["Begriff", "Definition"]