mindnet/app/core/retrieval/retriever_scoring.py

"""
FILE: app/core/retrieval/retriever_scoring.py
DESCRIPTION: Mathematische Kern-Logik für das WP-22/WP-15c Scoring.
             Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
             FIX v1.0.3: Optimierte Interaktion zwischen Typ-Boost und Status-Dämpfung.
VERSION: 1.0.3
STATUS: Active
"""
import os
import logging
from functools import lru_cache
from typing import Any, Dict, Tuple, Optional

try:
    import yaml
except ImportError:
    yaml = None

logger = logging.getLogger(__name__)

@lru_cache
def get_weights() -> Tuple[float, float, float]:
    """
    Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
    """
    from app.config import get_settings
    settings = get_settings()

    # Defaults aus Settings laden
    sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
    edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
    cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))

    # Optionaler Override via YAML
    config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
    if yaml and os.path.exists(config_path):
        try:
            with open(config_path, "r", encoding="utf-8") as f:
                data = yaml.safe_load(f) or {}
                scoring = data.get("scoring", {})
                sem = float(scoring.get("semantic_weight", sem))
                edge = float(scoring.get("edge_weight", edge))
                cent = float(scoring.get("centrality_weight", cent))
        except Exception as e:
            logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")

    return sem, edge, cent

def get_status_multiplier(payload: Dict[str, Any]) -> float:
    """
    WP-22 A: Content Lifecycle Multiplier.
    Steuert das Ranking basierend auf dem Reifegrad der Information.

    - stable: 1.2  (Belohnung für verifiziertes Wissen)
    - active: 1.0  (Standard-Gewichtung)
    - draft:  0.5  (Dämpfung für unfertige Fragmente)
    """
    status = str(payload.get("status", "active")).lower().strip()
    if status == "stable":
        return 1.2
    if status == "draft":
        return 0.5
    return 1.0

def compute_wp22_score(
    semantic_score: float,
    payload: Dict[str, Any],
    edge_bonus_raw: float = 0.0,
    cent_bonus_raw: float = 0.0,
    dynamic_edge_boosts: Optional[Dict[str, float]] = None
) -> Dict[str, Any]:
    """
    Die zentrale mathematische Scoring-Formel (WP-15c optimiert).
    Implementiert das Hybrid-Scoring (Semantic * Lifecycle * Graph).

    LOGIK:
    1. Base = Similarity * StatusMult (Lifecycle-Filter).
    2. Boosts = (TypeBoost - 1) + (GraphBoni * IntentFactor).
    3. Final = Base * (1 + Boosts).

    Der edge_bonus_raw enthält bereits die Super-Edge-Aggregation (WP-15c).
    """
    sem_w, edge_w_cfg, cent_w_cfg = get_weights()
    status_mult = get_status_multiplier(payload)

    # Retriever Weight (Typ-Boost aus types.yaml, z.B. 1.1 für Decisions)
    node_weight = float(payload.get("retriever_weight", 1.0))

    # 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
    # WICHTIG: Der Status wirkt hier als Multiplikator auf die Basis-Relevanz.
    base_val = float(semantic_score) * status_mult

    # 2. Graph Boost Factor (Intent-spezifische Verstärkung aus decision_engine.yaml)
    # Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
    graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0

    # 3. Einzelne Graph-Komponenten berechnen
    # WP-15c Hinweis: edge_bonus_raw ist durch den retriever.py bereits gedämpft/aggregiert.
    edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
    cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor

    # 4. Finales Zusammenführen (Merging)
    # (node_weight - 1.0) wandelt das Gewicht in einen relativen Bonus um (z.B. 1.2 -> +0.2).
    # Alle Boni werden addiert und wirken dann auf den base_val.
    type_impact = node_weight - 1.0
    total_boost = 1.0 + type_impact + edge_impact_final + cent_impact_final

    total = base_val * total_boost

    # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
    final_score = max(0.0001, float(total))

    # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der Score-Berechnung
    chunk_id = payload.get("chunk_id", payload.get("id", "unknown"))
    logger.debug(f"📈 [SCORE-TRACE] Chunk: {chunk_id} | Base: {base_val:.4f} | Multiplier: {total_boost:.2f} | Final: {final_score:.4f}")
    logger.debug(f"   -> Details: StatusMult={status_mult:.2f}, TypeImpact={type_impact:.2f}, EdgeImpact={edge_impact_final:.4f}, CentImpact={cent_impact_final:.4f}")

    return {
        "total": final_score,
        "edge_bonus": float(edge_bonus_raw),
        "cent_bonus": float(cent_bonus_raw),
        "status_multiplier": status_mult,
        "graph_boost_factor": graph_boost_factor,
        "type_impact": type_impact,
        "base_val": base_val,
        "edge_impact_final": edge_impact_final,
        "cent_impact_final": cent_impact_final
    }