""" FILE: app/core/retrieval/retriever_scoring.py DESCRIPTION: Mathematische Kern-Logik für das WP-22 Scoring. Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle. MODULARISIERUNG: Verschoben in das retrieval-Paket für WP-14. VERSION: 1.0.2 STATUS: Active DEPENDENCIES: app.config, typing """ import os import logging from functools import lru_cache from typing import Any, Dict, Tuple, Optional try: import yaml except ImportError: yaml = None logger = logging.getLogger(__name__) @lru_cache def get_weights() -> Tuple[float, float, float]: """ Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration. Priorität: 1. config/retriever.yaml (Scoring-Sektion) 2. Umgebungsvariablen (RETRIEVER_W_*) 3. System-Defaults (1.0, 0.0, 0.0) """ from app.config import get_settings settings = get_settings() # Defaults aus Settings laden sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0)) edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0)) cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0)) # Optionaler Override via YAML config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml") if yaml and os.path.exists(config_path): try: with open(config_path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) or {} scoring = data.get("scoring", {}) sem = float(scoring.get("semantic_weight", sem)) edge = float(scoring.get("edge_weight", edge)) cent = float(scoring.get("centrality_weight", cent)) except Exception as e: logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}") return sem, edge, cent def get_status_multiplier(payload: Dict[str, Any]) -> float: """ WP-22 A: Content Lifecycle Multiplier. Steuert das Ranking basierend auf dem Reifegrad der Information. - stable: 1.2 (Belohnung für verifiziertes Wissen) - active: 1.0 (Standard-Gewichtung) - draft: 0.5 (Bestrafung für unfertige Fragmente) """ status = str(payload.get("status", "active")).lower().strip() if status == "stable": return 1.2 if status == "draft": return 0.5 return 1.0 def compute_wp22_score( semantic_score: float, payload: Dict[str, Any], edge_bonus_raw: float = 0.0, cent_bonus_raw: float = 0.0, dynamic_edge_boosts: Optional[Dict[str, float]] = None ) -> Dict[str, Any]: """ Die zentrale mathematische Scoring-Formel der Mindnet Intelligence. Implementiert das WP-22 Hybrid-Scoring (Semantic * Lifecycle * Graph). FORMEL: Score = (Similarity * StatusMult) * (1 + (TypeWeight - 1) + ((EdgeW * EB + CentW * CB) * IntentBoost)) Returns: Dict mit dem finalen 'total' Score und allen mathematischen Zwischenwerten für den Explanation Layer. """ sem_w, edge_w_cfg, cent_w_cfg = get_weights() status_mult = get_status_multiplier(payload) # Retriever Weight (Type Boost aus types.yaml, z.B. 1.1 für Decisions) node_weight = float(payload.get("retriever_weight", 1.0)) # 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status) base_val = float(semantic_score) * status_mult # 2. Graph Boost Factor (Teil C: Intent-spezifische Verstärkung) # Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt. graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0 # 3. Einzelne Graph-Komponenten berechnen edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor # 4. Finales Zusammenführen (Merging) # (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral). total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final) # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor) final_score = max(0.0001, float(total)) return { "total": final_score, "edge_bonus": float(edge_bonus_raw), "cent_bonus": float(cent_bonus_raw), "status_multiplier": status_mult, "graph_boost_factor": graph_boost_factor, "type_impact": node_weight - 1.0, "base_val": base_val, "edge_impact_final": edge_impact_final, "cent_impact_final": cent_impact_final }