128 lines
5.1 KiB
Python
128 lines
5.1 KiB
Python
"""
|
|
FILE: app/core/retrieval/retriever_scoring.py
|
|
DESCRIPTION: Mathematische Kern-Logik für das WP-22/WP-15c Scoring.
|
|
Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
|
|
FIX v1.0.3: Optimierte Interaktion zwischen Typ-Boost und Status-Dämpfung.
|
|
VERSION: 1.0.3
|
|
STATUS: Active
|
|
"""
|
|
import os
|
|
import logging
|
|
from functools import lru_cache
|
|
from typing import Any, Dict, Tuple, Optional
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
yaml = None
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@lru_cache
|
|
def get_weights() -> Tuple[float, float, float]:
|
|
"""
|
|
Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
|
|
"""
|
|
from app.config import get_settings
|
|
settings = get_settings()
|
|
|
|
# Defaults aus Settings laden
|
|
sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
|
|
edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
|
|
cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))
|
|
|
|
# Optionaler Override via YAML
|
|
config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
|
|
if yaml and os.path.exists(config_path):
|
|
try:
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f) or {}
|
|
scoring = data.get("scoring", {})
|
|
sem = float(scoring.get("semantic_weight", sem))
|
|
edge = float(scoring.get("edge_weight", edge))
|
|
cent = float(scoring.get("centrality_weight", cent))
|
|
except Exception as e:
|
|
logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")
|
|
|
|
return sem, edge, cent
|
|
|
|
def get_status_multiplier(payload: Dict[str, Any]) -> float:
|
|
"""
|
|
WP-22 A: Content Lifecycle Multiplier.
|
|
Steuert das Ranking basierend auf dem Reifegrad der Information.
|
|
|
|
- stable: 1.2 (Belohnung für verifiziertes Wissen)
|
|
- active: 1.0 (Standard-Gewichtung)
|
|
- draft: 0.5 (Dämpfung für unfertige Fragmente)
|
|
"""
|
|
status = str(payload.get("status", "active")).lower().strip()
|
|
if status == "stable":
|
|
return 1.2
|
|
if status == "draft":
|
|
return 0.5
|
|
return 1.0
|
|
|
|
def compute_wp22_score(
|
|
semantic_score: float,
|
|
payload: Dict[str, Any],
|
|
edge_bonus_raw: float = 0.0,
|
|
cent_bonus_raw: float = 0.0,
|
|
dynamic_edge_boosts: Optional[Dict[str, float]] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Die zentrale mathematische Scoring-Formel (WP-15c optimiert).
|
|
Implementiert das Hybrid-Scoring (Semantic * Lifecycle * Graph).
|
|
|
|
LOGIK:
|
|
1. Base = Similarity * StatusMult (Lifecycle-Filter).
|
|
2. Boosts = (TypeBoost - 1) + (GraphBoni * IntentFactor).
|
|
3. Final = Base * (1 + Boosts).
|
|
|
|
Der edge_bonus_raw enthält bereits die Super-Edge-Aggregation (WP-15c).
|
|
"""
|
|
sem_w, edge_w_cfg, cent_w_cfg = get_weights()
|
|
status_mult = get_status_multiplier(payload)
|
|
|
|
# Retriever Weight (Typ-Boost aus types.yaml, z.B. 1.1 für Decisions)
|
|
node_weight = float(payload.get("retriever_weight", 1.0))
|
|
|
|
# 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
|
|
# WICHTIG: Der Status wirkt hier als Multiplikator auf die Basis-Relevanz.
|
|
base_val = float(semantic_score) * status_mult
|
|
|
|
# 2. Graph Boost Factor (Intent-spezifische Verstärkung aus decision_engine.yaml)
|
|
# Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
|
|
graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0
|
|
|
|
# 3. Einzelne Graph-Komponenten berechnen
|
|
# WP-15c Hinweis: edge_bonus_raw ist durch den retriever.py bereits gedämpft/aggregiert.
|
|
edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
|
|
cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
|
|
|
|
# 4. Finales Zusammenführen (Merging)
|
|
# (node_weight - 1.0) wandelt das Gewicht in einen relativen Bonus um (z.B. 1.2 -> +0.2).
|
|
# Alle Boni werden addiert und wirken dann auf den base_val.
|
|
type_impact = node_weight - 1.0
|
|
total_boost = 1.0 + type_impact + edge_impact_final + cent_impact_final
|
|
|
|
total = base_val * total_boost
|
|
|
|
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
|
|
final_score = max(0.0001, float(total))
|
|
|
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der Score-Berechnung
|
|
chunk_id = payload.get("chunk_id", payload.get("id", "unknown"))
|
|
logger.debug(f"📈 [SCORE-TRACE] Chunk: {chunk_id} | Base: {base_val:.4f} | Multiplier: {total_boost:.2f} | Final: {final_score:.4f}")
|
|
logger.debug(f" -> Details: StatusMult={status_mult:.2f}, TypeImpact={type_impact:.2f}, EdgeImpact={edge_impact_final:.4f}, CentImpact={cent_impact_final:.4f}")
|
|
|
|
return {
|
|
"total": final_score,
|
|
"edge_bonus": float(edge_bonus_raw),
|
|
"cent_bonus": float(cent_bonus_raw),
|
|
"status_multiplier": status_mult,
|
|
"graph_boost_factor": graph_boost_factor,
|
|
"type_impact": type_impact,
|
|
"base_val": base_val,
|
|
"edge_impact_final": edge_impact_final,
|
|
"cent_impact_final": cent_impact_final
|
|
} |