mindnet/app/routers/chat.py

"""
FILE: app/routers/chat.py
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
VERSION: 2.6.0 (WP-22 Semantic Graph Routing)
STATUS: Active
DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
"""

from fastapi import APIRouter, HTTPException, Depends
from typing import List, Dict, Any, Optional
import time
import uuid
import logging
import yaml
import os
from pathlib import Path

from app.config import get_settings
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.services.llm_service import LLMService
from app.core.retriever import Retriever
from app.services.feedback_service import log_search

router = APIRouter()
logger = logging.getLogger(__name__)

# --- Helper: Config Loader ---

_DECISION_CONFIG_CACHE = None
_TYPES_CONFIG_CACHE = None

def _load_decision_config() -> Dict[str, Any]:
    settings = get_settings()
    path = Path(settings.DECISION_CONFIG_PATH)
    default_config = {
        "strategies": {
            "FACT": {"trigger_keywords": []}
        }
    }

    if not path.exists():
        logger.warning(f"Decision config not found at {path}, using defaults.")
        return default_config

    try:
        with open(path, "r", encoding="utf-8") as f:
            return yaml.safe_load(f)
    except Exception as e:
        logger.error(f"Failed to load decision config: {e}")
        return default_config

def _load_types_config() -> Dict[str, Any]:
    """Lädt die types.yaml für Keyword-Erkennung."""
    path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
    try:
        with open(path, "r", encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except Exception:
        return {}

def get_full_config() -> Dict[str, Any]:
    global _DECISION_CONFIG_CACHE
    if _DECISION_CONFIG_CACHE is None:
        _DECISION_CONFIG_CACHE = _load_decision_config()
    return _DECISION_CONFIG_CACHE

def get_types_config() -> Dict[str, Any]:
    global _TYPES_CONFIG_CACHE
    if _TYPES_CONFIG_CACHE is None:
        _TYPES_CONFIG_CACHE = _load_types_config()
    return _TYPES_CONFIG_CACHE

def get_decision_strategy(intent: str) -> Dict[str, Any]:
    config = get_full_config()
    strategies = config.get("strategies", {})
    return strategies.get(intent, strategies.get("FACT", {}))

# --- Helper: Target Type Detection (WP-07) ---

def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
    """
    Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
    Nutzt Keywords aus types.yaml UND Mappings.
    """
    message_lower = message.lower()

    # 1. Check types.yaml detection_keywords (Priority!)
    types_cfg = get_types_config()
    types_def = types_cfg.get("types", {})

    for type_name, type_data in types_def.items():
        keywords = type_data.get("detection_keywords", [])
        for kw in keywords:
            if kw.lower() in message_lower:
                return type_name

    # 2. Direkter Match mit Schema-Keys
    for type_key in configured_schemas.keys():
        if type_key == "default": continue
        if type_key in message_lower:
            return type_key

    # 3. Synonym-Mapping (Legacy Fallback)
    synonyms = {
        "projekt": "project", "vorhaben": "project",
        "entscheidung": "decision", "beschluss": "decision",
        "ziel": "goal",
        "erfahrung": "experience", "lektion": "experience",
        "wert": "value",
        "prinzip": "principle",
        "notiz": "default", "idee": "default"
    }

    for term, schema_key in synonyms.items():
        if term in message_lower:
            return schema_key

    return "default"

# --- Dependencies ---

def get_llm_service():
    return LLMService()

def get_retriever():
    return Retriever()


# --- Logic ---

def _build_enriched_context(hits: List[QueryHit]) -> str:
    context_parts = []
    for i, hit in enumerate(hits, 1):
        source = hit.source or {}
        content = (
            source.get("text") or source.get("content") or
            source.get("page_content") or source.get("chunk_text") or
            "[Kein Text]"
        )
        title = hit.note_id or "Unbekannt"

        payload = hit.payload or {}
        note_type = payload.get("type") or source.get("type", "unknown")
        note_type = str(note_type).upper()

        entry = (
            f"### QUELLE {i}: {title}\n"
            f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
            f"INHALT:\n{content}\n"
        )
        context_parts.append(entry)

    return "\n\n".join(context_parts)

def _is_question(query: str) -> bool:
    """Prüft, ob der Input wahrscheinlich eine Frage ist."""
    q = query.strip().lower()
    if "?" in q: return True

    # W-Fragen Indikatoren (falls User das ? vergisst)
    starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
    if any(q.startswith(s + " ") for s in starters):
        return True

    return False

async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
    """
    Hybrid Router v5:
    1. Decision Keywords (Strategie) -> Prio 1
    2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
    3. LLM (Fallback) -> Prio 3
    """
    config = get_full_config()
    strategies = config.get("strategies", {})
    settings = config.get("settings", {})

    query_lower = query.lower()

    # 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
    for intent_name, strategy in strategies.items():
        if intent_name == "FACT": continue
        keywords = strategy.get("trigger_keywords", [])
        for k in keywords:
            if k.lower() in query_lower:
                return intent_name, "Keyword (Strategy)"

    # 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
    # FIX: Wir prüfen, ob es eine Frage ist. Fragen zu Typen sollen RAG (FACT/DECISION) sein,
    # keine Interviews. Wir überlassen das dann dem LLM Router (Slow Path).

    if not _is_question(query_lower):
        types_cfg = get_types_config()
        types_def = types_cfg.get("types", {})

        for type_name, type_data in types_def.items():
            keywords = type_data.get("detection_keywords", [])
            for kw in keywords:
                if kw.lower() in query_lower:
                    return "INTERVIEW", f"Keyword (Type: {type_name})"

    # 3. SLOW PATH: LLM Router
    if settings.get("llm_fallback_enabled", False):
        # Nutze Prompts aus prompts.yaml (via LLM Service)
        router_prompt_template = llm.prompts.get("router_prompt", "")

        if router_prompt_template:
            prompt = router_prompt_template.replace("{query}", query)
            logger.info("Keywords failed (or Question detected). Asking LLM for Intent...")

            try:
                # Nutze priority="realtime" für den Router, damit er nicht wartet
                raw_response = await llm.generate_raw_response(prompt, priority="realtime")
                llm_output_upper = raw_response.upper()

                # Zuerst INTERVIEW prüfen
                if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
                    return "INTERVIEW", "LLM Router"

                for strat_key in strategies.keys():
                    if strat_key in llm_output_upper:
                        return strat_key, "LLM Router"

            except Exception as e:
                logger.error(f"Router LLM failed: {e}")

    return "FACT", "Default (No Match)"

@router.post("/", response_model=ChatResponse)
async def chat_endpoint(
    request: ChatRequest,
    llm: LLMService = Depends(get_llm_service),
    retriever: Retriever = Depends(get_retriever)
):
    start_time = time.time()
    query_id = str(uuid.uuid4())
    logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")

    try:
        # 1. Intent Detection
        intent, intent_source = await _classify_intent(request.message, llm)
        logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")

        # Strategy Load
        strategy = get_decision_strategy(intent)
        prompt_key = strategy.get("prompt_template", "rag_template")

        sources_hits = []
        final_prompt = ""

        if intent == "INTERVIEW":
            # --- INTERVIEW MODE ---
            target_type = _detect_target_type(request.message, strategy.get("schemas", {}))

            types_cfg = get_types_config()
            type_def = types_cfg.get("types", {}).get(target_type, {})
            fields_list = type_def.get("schema", [])

            if not fields_list:
                configured_schemas = strategy.get("schemas", {})
                fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
                if isinstance(fallback_schema, dict):
                    fields_list = fallback_schema.get("fields", [])
                else:
                    fields_list = fallback_schema or []

            logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
            fields_str = "\n- " + "\n- ".join(fields_list)

            template = llm.prompts.get(prompt_key, "")
            final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
                                   .replace("{query}", request.message) \
                                   .replace("{target_type}", target_type) \
                                   .replace("{schema_fields}", fields_str) \
                                   .replace("{schema_hint}", "")
            sources_hits = []

        else:
            # --- RAG MODE ---
            inject_types = strategy.get("inject_types", [])
            prepend_instr = strategy.get("prepend_instruction", "")

            # --- WP-22: Semantic Graph Routing ---
            # Wir laden die konfigurierten Edge-Boosts für diesen Intent
            edge_boosts = strategy.get("edge_boosts", {})
            if edge_boosts:
                logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}")

            query_req = QueryRequest(
                query=request.message,
                mode="hybrid",
                top_k=request.top_k,
                explain=request.explain,
                # WP-22: Boosts weitergeben
                boost_edges=edge_boosts
            )
            retrieve_result = await retriever.search(query_req)
            hits = retrieve_result.results

            if inject_types:
                strategy_req = QueryRequest(
                    query=request.message,
                    mode="hybrid",
                    top_k=3,
                    filters={"type": inject_types},
                    explain=False,
                    # WP-22: Boosts auch hier anwenden (Konsistenz)
                    boost_edges=edge_boosts
                )
                strategy_result = await retriever.search(strategy_req)
                existing_ids = {h.node_id for h in hits}
                for strat_hit in strategy_result.results:
                    if strat_hit.node_id not in existing_ids:
                        hits.append(strat_hit)

            if not hits:
                context_str = "Keine relevanten Notizen gefunden."
            else:
                context_str = _build_enriched_context(hits)

            template = llm.prompts.get(prompt_key, "{context_str}\n\n{query}")

            if prepend_instr:
                 context_str = f"{prepend_instr}\n\n{context_str}"

            final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
            sources_hits = hits

        # --- GENERATION ---
        system_prompt = llm.prompts.get("system_prompt", "")

        # Chat nutzt IMMER realtime priority
        answer_text = await llm.generate_raw_response(
            prompt=final_prompt,
            system=system_prompt,
            priority="realtime"
        )

        duration_ms = int((time.time() - start_time) * 1000)

        # Logging
        try:
            log_search(
                query_id=query_id,
                query_text=request.message,
                results=sources_hits,
                mode="interview" if intent == "INTERVIEW" else "chat_rag",
                metadata={"intent": intent, "source": intent_source}
            )
        except: pass

        return ChatResponse(
            query_id=query_id,
            answer=answer_text,
            sources=sources_hits,
            latency_ms=duration_ms,
            intent=intent,
            intent_source=intent_source
        )

    except Exception as e:
        logger.error(f"Error in chat endpoint: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))