mindnet/app/core/retrieval/retriever_scoring.py

123 lines
4.7 KiB
Python

"""
FILE: app/core/retrieval/retriever_scoring.py
DESCRIPTION: Mathematische Kern-Logik für das WP-22/WP-15c Scoring.
Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
FIX v1.0.3: Optimierte Interaktion zwischen Typ-Boost und Status-Dämpfung.
VERSION: 1.0.3
STATUS: Active
"""
import os
import logging
from functools import lru_cache
from typing import Any, Dict, Tuple, Optional
try:
import yaml
except ImportError:
yaml = None
logger = logging.getLogger(__name__)
@lru_cache
def get_weights() -> Tuple[float, float, float]:
"""
Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
"""
from app.config import get_settings
settings = get_settings()
# Defaults aus Settings laden
sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))
# Optionaler Override via YAML
config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
if yaml and os.path.exists(config_path):
try:
with open(config_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
scoring = data.get("scoring", {})
sem = float(scoring.get("semantic_weight", sem))
edge = float(scoring.get("edge_weight", edge))
cent = float(scoring.get("centrality_weight", cent))
except Exception as e:
logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")
return sem, edge, cent
def get_status_multiplier(payload: Dict[str, Any]) -> float:
"""
WP-22 A: Content Lifecycle Multiplier.
Steuert das Ranking basierend auf dem Reifegrad der Information.
- stable: 1.2 (Belohnung für verifiziertes Wissen)
- active: 1.0 (Standard-Gewichtung)
- draft: 0.5 (Dämpfung für unfertige Fragmente)
"""
status = str(payload.get("status", "active")).lower().strip()
if status == "stable":
return 1.2
if status == "draft":
return 0.5
return 1.0
def compute_wp22_score(
semantic_score: float,
payload: Dict[str, Any],
edge_bonus_raw: float = 0.0,
cent_bonus_raw: float = 0.0,
dynamic_edge_boosts: Optional[Dict[str, float]] = None
) -> Dict[str, Any]:
"""
Die zentrale mathematische Scoring-Formel (WP-15c optimiert).
Implementiert das Hybrid-Scoring (Semantic * Lifecycle * Graph).
LOGIK:
1. Base = Similarity * StatusMult (Lifecycle-Filter).
2. Boosts = (TypeBoost - 1) + (GraphBoni * IntentFactor).
3. Final = Base * (1 + Boosts).
Der edge_bonus_raw enthält bereits die Super-Edge-Aggregation (WP-15c).
"""
sem_w, edge_w_cfg, cent_w_cfg = get_weights()
status_mult = get_status_multiplier(payload)
# Retriever Weight (Typ-Boost aus types.yaml, z.B. 1.1 für Decisions)
node_weight = float(payload.get("retriever_weight", 1.0))
# 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
# WICHTIG: Der Status wirkt hier als Multiplikator auf die Basis-Relevanz.
base_val = float(semantic_score) * status_mult
# 2. Graph Boost Factor (Intent-spezifische Verstärkung aus decision_engine.yaml)
# Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0
# 3. Einzelne Graph-Komponenten berechnen
# WP-15c Hinweis: edge_bonus_raw ist durch den retriever.py bereits gedämpft/aggregiert.
edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
# 4. Finales Zusammenführen (Merging)
# (node_weight - 1.0) wandelt das Gewicht in einen relativen Bonus um (z.B. 1.2 -> +0.2).
# Alle Boni werden addiert und wirken dann auf den base_val.
type_impact = node_weight - 1.0
total_boost = 1.0 + type_impact + edge_impact_final + cent_impact_final
total = base_val * total_boost
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
final_score = max(0.0001, float(total))
return {
"total": final_score,
"edge_bonus": float(edge_bonus_raw),
"cent_bonus": float(cent_bonus_raw),
"status_multiplier": status_mult,
"graph_boost_factor": graph_boost_factor,
"type_impact": type_impact,
"base_val": base_val,
"edge_impact_final": edge_impact_final,
"cent_impact_final": cent_impact_final
}