121 lines
4.5 KiB
Python
121 lines
4.5 KiB
Python
"""
|
|
FILE: app/core/retrieval/retriever_scoring.py
|
|
DESCRIPTION: Mathematische Kern-Logik für das WP-22 Scoring.
|
|
Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
|
|
MODULARISIERUNG: Verschoben in das retrieval-Paket für WP-14.
|
|
VERSION: 1.0.2
|
|
STATUS: Active
|
|
DEPENDENCIES: app.config, typing
|
|
"""
|
|
import os
|
|
import logging
|
|
from functools import lru_cache
|
|
from typing import Any, Dict, Tuple, Optional
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
yaml = None
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@lru_cache
|
|
def get_weights() -> Tuple[float, float, float]:
|
|
"""
|
|
Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
|
|
Priorität:
|
|
1. config/retriever.yaml (Scoring-Sektion)
|
|
2. Umgebungsvariablen (RETRIEVER_W_*)
|
|
3. System-Defaults (1.0, 0.0, 0.0)
|
|
"""
|
|
from app.config import get_settings
|
|
settings = get_settings()
|
|
|
|
# Defaults aus Settings laden
|
|
sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
|
|
edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
|
|
cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))
|
|
|
|
# Optionaler Override via YAML
|
|
config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
|
|
if yaml and os.path.exists(config_path):
|
|
try:
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f) or {}
|
|
scoring = data.get("scoring", {})
|
|
sem = float(scoring.get("semantic_weight", sem))
|
|
edge = float(scoring.get("edge_weight", edge))
|
|
cent = float(scoring.get("centrality_weight", cent))
|
|
except Exception as e:
|
|
logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")
|
|
|
|
return sem, edge, cent
|
|
|
|
def get_status_multiplier(payload: Dict[str, Any]) -> float:
|
|
"""
|
|
WP-22 A: Content Lifecycle Multiplier.
|
|
Steuert das Ranking basierend auf dem Reifegrad der Information.
|
|
|
|
- stable: 1.2 (Belohnung für verifiziertes Wissen)
|
|
- active: 1.0 (Standard-Gewichtung)
|
|
- draft: 0.5 (Bestrafung für unfertige Fragmente)
|
|
"""
|
|
status = str(payload.get("status", "active")).lower().strip()
|
|
if status == "stable":
|
|
return 1.2
|
|
if status == "draft":
|
|
return 0.5
|
|
return 1.0
|
|
|
|
def compute_wp22_score(
|
|
semantic_score: float,
|
|
payload: Dict[str, Any],
|
|
edge_bonus_raw: float = 0.0,
|
|
cent_bonus_raw: float = 0.0,
|
|
dynamic_edge_boosts: Optional[Dict[str, float]] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Die zentrale mathematische Scoring-Formel der Mindnet Intelligence.
|
|
Implementiert das WP-22 Hybrid-Scoring (Semantic * Lifecycle * Graph).
|
|
|
|
FORMEL:
|
|
Score = (Similarity * StatusMult) * (1 + (TypeWeight - 1) + ((EdgeW * EB + CentW * CB) * IntentBoost))
|
|
|
|
Returns:
|
|
Dict mit dem finalen 'total' Score und allen mathematischen Zwischenwerten für den Explanation Layer.
|
|
"""
|
|
sem_w, edge_w_cfg, cent_w_cfg = get_weights()
|
|
status_mult = get_status_multiplier(payload)
|
|
|
|
# Retriever Weight (Type Boost aus types.yaml, z.B. 1.1 für Decisions)
|
|
node_weight = float(payload.get("retriever_weight", 1.0))
|
|
|
|
# 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
|
|
base_val = float(semantic_score) * status_mult
|
|
|
|
# 2. Graph Boost Factor (Teil C: Intent-spezifische Verstärkung)
|
|
# Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
|
|
graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0
|
|
|
|
# 3. Einzelne Graph-Komponenten berechnen
|
|
edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
|
|
cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
|
|
|
|
# 4. Finales Zusammenführen (Merging)
|
|
# (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
|
|
total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)
|
|
|
|
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
|
|
final_score = max(0.0001, float(total))
|
|
|
|
return {
|
|
"total": final_score,
|
|
"edge_bonus": float(edge_bonus_raw),
|
|
"cent_bonus": float(cent_bonus_raw),
|
|
"status_multiplier": status_mult,
|
|
"graph_boost_factor": graph_boost_factor,
|
|
"type_impact": node_weight - 1.0,
|
|
"base_val": base_val,
|
|
"edge_impact_final": edge_impact_final,
|
|
"cent_impact_final": cent_impact_final
|
|
} |