mindnet/app/core/retrieval/retriever_scoring.py

"""
FILE: app/core/retrieval/retriever_scoring.py
DESCRIPTION: Mathematische Kern-Logik für das WP-22 Scoring.
             Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
             MODULARISIERUNG: Verschoben in das retrieval-Paket für WP-14.
VERSION: 1.0.2
STATUS: Active
DEPENDENCIES: app.config, typing
"""
import os
import logging
from functools import lru_cache
from typing import Any, Dict, Tuple, Optional

try:
    import yaml
except ImportError:
    yaml = None

logger = logging.getLogger(__name__)

@lru_cache
def get_weights() -> Tuple[float, float, float]:
    """
    Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
    Priorität:
    1. config/retriever.yaml (Scoring-Sektion)
    2. Umgebungsvariablen (RETRIEVER_W_*)
    3. System-Defaults (1.0, 0.0, 0.0)
    """
    from app.config import get_settings
    settings = get_settings()

    # Defaults aus Settings laden
    sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
    edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
    cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))

    # Optionaler Override via YAML
    config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
    if yaml and os.path.exists(config_path):
        try:
            with open(config_path, "r", encoding="utf-8") as f:
                data = yaml.safe_load(f) or {}
                scoring = data.get("scoring", {})
                sem = float(scoring.get("semantic_weight", sem))
                edge = float(scoring.get("edge_weight", edge))
                cent = float(scoring.get("centrality_weight", cent))
        except Exception as e:
            logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")

    return sem, edge, cent

def get_status_multiplier(payload: Dict[str, Any]) -> float:
    """
    WP-22 A: Content Lifecycle Multiplier.
    Steuert das Ranking basierend auf dem Reifegrad der Information.

    - stable: 1.2  (Belohnung für verifiziertes Wissen)
    - active: 1.0  (Standard-Gewichtung)
    - draft:  0.5  (Bestrafung für unfertige Fragmente)
    """
    status = str(payload.get("status", "active")).lower().strip()
    if status == "stable":
        return 1.2
    if status == "draft":
        return 0.5
    return 1.0

def compute_wp22_score(
    semantic_score: float,
    payload: Dict[str, Any],
    edge_bonus_raw: float = 0.0,
    cent_bonus_raw: float = 0.0,
    dynamic_edge_boosts: Optional[Dict[str, float]] = None
) -> Dict[str, Any]:
    """
    Die zentrale mathematische Scoring-Formel der Mindnet Intelligence.
    Implementiert das WP-22 Hybrid-Scoring (Semantic * Lifecycle * Graph).

    FORMEL:
    Score = (Similarity * StatusMult) * (1 + (TypeWeight - 1) + ((EdgeW * EB + CentW * CB) * IntentBoost))

    Returns:
        Dict mit dem finalen 'total' Score und allen mathematischen Zwischenwerten für den Explanation Layer.
    """
    sem_w, edge_w_cfg, cent_w_cfg = get_weights()
    status_mult = get_status_multiplier(payload)

    # Retriever Weight (Type Boost aus types.yaml, z.B. 1.1 für Decisions)
    node_weight = float(payload.get("retriever_weight", 1.0))

    # 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
    base_val = float(semantic_score) * status_mult

    # 2. Graph Boost Factor (Teil C: Intent-spezifische Verstärkung)
    # Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
    graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0

    # 3. Einzelne Graph-Komponenten berechnen
    edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
    cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor

    # 4. Finales Zusammenführen (Merging)
    # (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
    total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)

    # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
    final_score = max(0.0001, float(total))

    return {
        "total": final_score,
        "edge_bonus": float(edge_bonus_raw),
        "cent_bonus": float(cent_bonus_raw),
        "status_multiplier": status_mult,
        "graph_boost_factor": graph_boost_factor,
        "type_impact": node_weight - 1.0,
        "base_val": base_val,
        "edge_impact_final": edge_impact_final,
        "cent_impact_final": cent_impact_final
    }