""" FILE: app/core/retrieval/retriever_scoring.py DESCRIPTION: Mathematische Kern-Logik für das WP-22/WP-15c Scoring. Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle. FIX v1.0.3: Optimierte Interaktion zwischen Typ-Boost und Status-Dämpfung. VERSION: 1.0.3 STATUS: Active """ import os import logging from functools import lru_cache from typing import Any, Dict, Tuple, Optional try: import yaml except ImportError: yaml = None logger = logging.getLogger(__name__) @lru_cache def get_weights() -> Tuple[float, float, float]: """ Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration. """ from app.config import get_settings settings = get_settings() # Defaults aus Settings laden sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0)) edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0)) cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0)) # Optionaler Override via YAML config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml") if yaml and os.path.exists(config_path): try: with open(config_path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) or {} scoring = data.get("scoring", {}) sem = float(scoring.get("semantic_weight", sem)) edge = float(scoring.get("edge_weight", edge)) cent = float(scoring.get("centrality_weight", cent)) except Exception as e: logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}") return sem, edge, cent def get_status_multiplier(payload: Dict[str, Any]) -> float: """ WP-22 A: Content Lifecycle Multiplier. Steuert das Ranking basierend auf dem Reifegrad der Information. - stable: 1.2 (Belohnung für verifiziertes Wissen) - active: 1.0 (Standard-Gewichtung) - draft: 0.5 (Dämpfung für unfertige Fragmente) """ status = str(payload.get("status", "active")).lower().strip() if status == "stable": return 1.2 if status == "draft": return 0.5 return 1.0 def compute_wp22_score( semantic_score: float, payload: Dict[str, Any], edge_bonus_raw: float = 0.0, cent_bonus_raw: float = 0.0, dynamic_edge_boosts: Optional[Dict[str, float]] = None ) -> Dict[str, Any]: """ Die zentrale mathematische Scoring-Formel (WP-15c optimiert). Implementiert das Hybrid-Scoring (Semantic * Lifecycle * Graph). LOGIK: 1. Base = Similarity * StatusMult (Lifecycle-Filter). 2. Boosts = (TypeBoost - 1) + (GraphBoni * IntentFactor). 3. Final = Base * (1 + Boosts). Der edge_bonus_raw enthält bereits die Super-Edge-Aggregation (WP-15c). """ sem_w, edge_w_cfg, cent_w_cfg = get_weights() status_mult = get_status_multiplier(payload) # Retriever Weight (Typ-Boost aus types.yaml, z.B. 1.1 für Decisions) node_weight = float(payload.get("retriever_weight", 1.0)) # 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status) # WICHTIG: Der Status wirkt hier als Multiplikator auf die Basis-Relevanz. base_val = float(semantic_score) * status_mult # 2. Graph Boost Factor (Intent-spezifische Verstärkung aus decision_engine.yaml) # Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt. graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0 # 3. Einzelne Graph-Komponenten berechnen # WP-15c Hinweis: edge_bonus_raw ist durch den retriever.py bereits gedämpft/aggregiert. edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor # 4. Finales Zusammenführen (Merging) # (node_weight - 1.0) wandelt das Gewicht in einen relativen Bonus um (z.B. 1.2 -> +0.2). # Alle Boni werden addiert und wirken dann auf den base_val. type_impact = node_weight - 1.0 total_boost = 1.0 + type_impact + edge_impact_final + cent_impact_final total = base_val * total_boost # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor) final_score = max(0.0001, float(total)) # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der Score-Berechnung chunk_id = payload.get("chunk_id", payload.get("id", "unknown")) logger.debug(f"📈 [SCORE-TRACE] Chunk: {chunk_id} | Base: {base_val:.4f} | Multiplier: {total_boost:.2f} | Final: {final_score:.4f}") logger.debug(f" -> Details: StatusMult={status_mult:.2f}, TypeImpact={type_impact:.2f}, EdgeImpact={edge_impact_final:.4f}, CentImpact={cent_impact_final:.4f}") return { "total": final_score, "edge_bonus": float(edge_bonus_raw), "cent_bonus": float(cent_bonus_raw), "status_multiplier": status_mult, "graph_boost_factor": graph_boost_factor, "type_impact": type_impact, "base_val": base_val, "edge_impact_final": edge_impact_final, "cent_impact_final": cent_impact_final }