mindnet/app/routers/chat.py

"""
app/routers/chat.py — RAG Endpunkt (WP-06 Decision Engine - Full Config Refactor)

Zweck:
    Verbindet Retrieval mit LLM-Generation.
    WP-06: Implementiert Intent Detection und Strategic Retrieval.
    Update: Konfiguration via decision_engine.yaml (Late Binding) mit 'Best Match' Logik.
"""

from fastapi import APIRouter, HTTPException, Depends
from typing import List, Dict, Any
import time
import uuid
import logging
import yaml
from pathlib import Path

from app.config import get_settings
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.services.llm_service import LLMService
from app.core.retriever import Retriever

router = APIRouter()
logger = logging.getLogger(__name__)

# --- Helper: Config Loader ---

# Cache für die Config (damit wir nicht bei jedem Request lesen)
_DECISION_CONFIG_CACHE = None

def _load_decision_config() -> Dict[str, Any]:
    """Lädt die Decision-Engine Konfiguration (Late Binding)."""
    settings = get_settings()
    path = Path(settings.DECISION_CONFIG_PATH)

    # Default Fallback, falls YAML kaputt/weg
    default_config = {
        "strategies": {
            "FACT": {"trigger_keywords": []},
            "DECISION": {
                "trigger_keywords": ["soll ich", "meinung"],
                "inject_types": ["value", "principle"],
                "prompt_template": "decision_template"
            }
        }
    }

    if not path.exists():
        logger.warning(f"Decision config not found at {path}, using defaults.")
        return default_config

    try:
        with open(path, "r", encoding="utf-8") as f:
            return yaml.safe_load(f)
    except Exception as e:
        logger.error(f"Failed to load decision config: {e}")
        return default_config

def get_full_config() -> Dict[str, Any]:
    """Gibt die ganze Config zurück (für Intent Detection)."""
    global _DECISION_CONFIG_CACHE
    if _DECISION_CONFIG_CACHE is None:
        _DECISION_CONFIG_CACHE = _load_decision_config()
    return _DECISION_CONFIG_CACHE

def get_decision_strategy(intent: str) -> Dict[str, Any]:
    """Gibt die Strategie für einen spezifischen Intent zurück."""
    config = get_full_config()
    strategies = config.get("strategies", {})
    # Fallback auf FACT, wenn Intent unbekannt
    return strategies.get(intent, strategies.get("FACT", {}))


# --- Dependencies ---

def get_llm_service():
    return LLMService()

def get_retriever():
    return Retriever()


# --- Logic ---

def _build_enriched_context(hits: List[QueryHit]) -> str:
    """
    Baut einen 'Rich Context' String.
    Statt nur Text, injizieren wir Metadaten (Typ, Tags), damit das LLM
    die semantische Rolle des Schnipsels versteht.
    """
    context_parts = []

    for i, hit in enumerate(hits, 1):
        source = hit.source or {}

        # 1. Content extrahieren
        content = (
            source.get("text") or
            source.get("content") or
            source.get("page_content") or
            source.get("chunk_text") or
            "[Kein Textinhalt verfügbar]"
        )

        # 2. Metadaten für "Context Intelligence"
        title = hit.note_id or "Unbekannte Notiz"
        note_type = source.get("type", "unknown").upper()

        # 3. Formatierung
        entry = (
            f"### QUELLE {i}: {title}\n"
            f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
            f"INHALT:\n{content}\n"
        )
        context_parts.append(entry)

    return "\n\n".join(context_parts)

async def _classify_intent(query: str, llm: LLMService) -> str:
    """
    WP-06: Intent Detection (Best Match / Longest Keyword Wins).

    Prüft Keywords aus der YAML gegen die Query.
    Wenn mehrere Strategien passen, gewinnt die mit dem längsten Keyword (Spezifität).
    """
    config = get_full_config()
    strategies = config.get("strategies", {})

    query_lower = query.lower()

    best_intent = "FACT"
    max_match_length = 0

    # Iteriere über alle Strategien
    for intent_name, strategy in strategies.items():
        if intent_name == "FACT":
            continue

        keywords = strategy.get("trigger_keywords", [])

        # Prüfe jedes Keyword
        for k in keywords:
            # Wenn Keyword im Text ist...
            if k.lower() in query_lower:
                # ... prüfen wir, ob es spezifischer (länger) ist als der bisherige Favorit
                current_len = len(k)
                if current_len > max_match_length:
                    max_match_length = current_len
                    best_intent = intent_name
                    # Wir brechen hier NICHT ab, sondern suchen weiter nach noch längeren Matches

    return best_intent

@router.post("/", response_model=ChatResponse)
async def chat_endpoint(
    request: ChatRequest,
    llm: LLMService = Depends(get_llm_service),
    retriever: Retriever = Depends(get_retriever)
):
    start_time = time.time()
    query_id = str(uuid.uuid4())

    logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")

    try:
        # 1. Intent Detection (Config-Driven & Best Match)
        intent = await _classify_intent(request.message, llm)
        logger.info(f"[{query_id}] Detected Intent: {intent}")

        # Lade Strategie aus Config (Late Binding)
        strategy = get_decision_strategy(intent)
        inject_types = strategy.get("inject_types", [])
        prompt_key = strategy.get("prompt_template", "rag_template")
        prepend_instr = strategy.get("prepend_instruction", "")

        # 2. Primary Retrieval (Fakten)
        query_req = QueryRequest(
            query=request.message,
            mode="hybrid",
            top_k=request.top_k,
            explain=request.explain
        )
        retrieve_result = await retriever.search(query_req)
        hits = retrieve_result.results

        # 3. Strategic Retrieval (Konfigurierbar)
        if inject_types:
            logger.info(f"[{query_id}] Executing Strategic Retrieval for types: {inject_types}...")
            strategy_req = QueryRequest(
                query=request.message,
                mode="hybrid",
                top_k=3,
                filters={"type": inject_types}, # Dynamische Liste aus YAML
                explain=False
            )
            strategy_result = await retriever.search(strategy_req)

            # Merge Results (Deduplication via node_id)
            existing_ids = {h.node_id for h in hits}
            for strat_hit in strategy_result.results:
                if strat_hit.node_id not in existing_ids:
                    hits.append(strat_hit)

        # 4. Context Building
        if not hits:
            context_str = "Keine relevanten Notizen gefunden."
        else:
            context_str = _build_enriched_context(hits)

        # 5. Generation Setup
        template = llm.prompts.get(prompt_key, "{context_str}\n\n{query}")

        # Injection der Instruktion (falls konfiguriert)
        if prepend_instr:
             context_str = f"{prepend_instr}\n\n{context_str}"

        logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
        answer_text = await llm.generate_rag_response(
            query=request.message,
            context_str=context_str
        )

        # 6. Response
        duration_ms = int((time.time() - start_time) * 1000)

        return ChatResponse(
            query_id=query_id,
            answer=answer_text,
            sources=hits,
            latency_ms=duration_ms,
            intent=intent
        )

    except Exception as e:
        logger.error(f"Error in chat endpoint: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))