bug fix
This commit is contained in:
parent
33b0c83c87
commit
ba46957556
|
|
@ -2,7 +2,9 @@
|
||||||
FILE: app/core/retriever.py
|
FILE: app/core/retriever.py
|
||||||
DESCRIPTION: Haupt-Schnittstelle für die Suche. Orchestriert Vektorsuche und Graph-Expansion.
|
DESCRIPTION: Haupt-Schnittstelle für die Suche. Orchestriert Vektorsuche und Graph-Expansion.
|
||||||
Nutzt retriever_scoring.py für die WP-22 Logik.
|
Nutzt retriever_scoring.py für die WP-22 Logik.
|
||||||
VERSION: 0.6.14 (WP-22 Full, Debug & Stable)
|
FIX: TypeError in embed_text (model_name) behoben.
|
||||||
|
FIX: Pydantic ValidationError (Target/Source) behoben.
|
||||||
|
VERSION: 0.6.15 (WP-22 Full & Stable)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.core.graph_adapter, app.core.retriever_scoring
|
DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.core.graph_adapter, app.core.retriever_scoring
|
||||||
"""
|
"""
|
||||||
|
|
@ -28,22 +30,36 @@ from app.core.retriever_scoring import get_weights, compute_wp22_score
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# --- Hilfsfunktionen für Qdrant ---
|
# ==============================================================================
|
||||||
|
# 1. CORE HELPERS & CONFIG LOADERS
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
def _get_client_and_prefix() -> Tuple[Any, str]:
|
def _get_client_and_prefix() -> Tuple[Any, str]:
|
||||||
"""Initialisiert Qdrant Client und lädt Collection-Prefix."""
|
"""Initialisiert Qdrant Client und lädt Collection-Prefix."""
|
||||||
cfg = qdr.QdrantConfig.from_env()
|
cfg = qdr.QdrantConfig.from_env()
|
||||||
return qdr.get_client(cfg), cfg.prefix
|
return qdr.get_client(cfg), cfg.prefix
|
||||||
|
|
||||||
|
|
||||||
def _get_query_vector(req: QueryRequest) -> List[float]:
|
def _get_query_vector(req: QueryRequest) -> List[float]:
|
||||||
"""Vektorisiert die Anfrage oder nutzt vorhandenen Vektor."""
|
"""
|
||||||
|
Vektorisiert die Anfrage.
|
||||||
|
FIX: Enthält try-except Block für unterschiedliche Signaturen von ec.embed_text.
|
||||||
|
"""
|
||||||
if req.query_vector:
|
if req.query_vector:
|
||||||
return list(req.query_vector)
|
return list(req.query_vector)
|
||||||
if not req.query:
|
if not req.query:
|
||||||
raise ValueError("Kein Text oder Vektor für die Suche angegeben.")
|
raise ValueError("Kein Text oder Vektor für die Suche angegeben.")
|
||||||
|
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
return ec.embed_text(req.query, model_name=settings.MODEL_NAME)
|
|
||||||
|
try:
|
||||||
|
# Versuch mit modernem Interface (WP-03 kompatibel)
|
||||||
|
return ec.embed_text(req.query, model_name=settings.MODEL_NAME)
|
||||||
|
except TypeError:
|
||||||
|
# Fallback für Signaturen, die 'model_name' nicht als Keyword akzeptieren
|
||||||
|
logger.debug("ec.embed_text does not accept 'model_name' keyword. Falling back.")
|
||||||
|
return ec.embed_text(req.query)
|
||||||
|
|
||||||
|
|
||||||
def _semantic_hits(
|
def _semantic_hits(
|
||||||
client: Any,
|
client: Any,
|
||||||
|
|
@ -57,7 +73,9 @@ def _semantic_hits(
|
||||||
# Strikte Typkonvertierung für Stabilität
|
# Strikte Typkonvertierung für Stabilität
|
||||||
return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
|
return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
|
||||||
|
|
||||||
# --- Explanation Layer (Detaillierte Begründungen) ---
|
# ==============================================================================
|
||||||
|
# 2. EXPLANATION LAYER (DEBUG & VERIFIABILITY)
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
def _build_explanation(
|
def _build_explanation(
|
||||||
semantic_score: float,
|
semantic_score: float,
|
||||||
|
|
@ -100,7 +118,7 @@ def _build_explanation(
|
||||||
type_weight = float(payload.get("retriever_weight", 1.0))
|
type_weight = float(payload.get("retriever_weight", 1.0))
|
||||||
if type_weight != 1.0:
|
if type_weight != 1.0:
|
||||||
msg = "Bevorzugt" if type_weight > 1.0 else "De-priorisiert"
|
msg = "Bevorzugt" if type_weight > 1.0 else "De-priorisiert"
|
||||||
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Notiz-Typs.", score_impact=base_val * (type_weight - 1.0)))
|
reasons.append(Reason(kind="type", message=f"{msg} durch Typ-Profil.", score_impact=base_val * (type_weight - 1.0)))
|
||||||
|
|
||||||
# 4. Kanten-Verarbeitung (Graph-Intelligence)
|
# 4. Kanten-Verarbeitung (Graph-Intelligence)
|
||||||
if subgraph and target_note_id and scoring_debug["edge_bonus"] > 0:
|
if subgraph and target_note_id and scoring_debug["edge_bonus"] > 0:
|
||||||
|
|
@ -155,7 +173,9 @@ def _build_explanation(
|
||||||
applied_boosts=applied_boosts
|
applied_boosts=applied_boosts
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- Kern-Logik für Hybrid-Retrieval ---
|
# ==============================================================================
|
||||||
|
# 3. CORE RETRIEVAL PIPELINE
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
def _build_hits_from_semantic(
|
def _build_hits_from_semantic(
|
||||||
hits: Iterable[Tuple[str, float, Dict[str, Any]]],
|
hits: Iterable[Tuple[str, float, Dict[str, Any]]],
|
||||||
|
|
@ -171,7 +191,6 @@ def _build_hits_from_semantic(
|
||||||
|
|
||||||
for pid, semantic_score, payload in hits:
|
for pid, semantic_score, payload in hits:
|
||||||
edge_bonus, cent_bonus = 0.0, 0.0
|
edge_bonus, cent_bonus = 0.0, 0.0
|
||||||
# Graph-Abfrage erfolgt IMMER über die Note-ID, nicht Chunk-ID
|
|
||||||
target_id = payload.get("note_id")
|
target_id = payload.get("note_id")
|
||||||
|
|
||||||
if subgraph and target_id:
|
if subgraph and target_id:
|
||||||
|
|
@ -269,7 +288,7 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
data["weight"] = data.get("weight", 1.0) * prov_w * intent_multiplier
|
data["weight"] = data.get("weight", 1.0) * prov_w * intent_multiplier
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Graph Expansion failed criticaly: {e}", exc_info=True)
|
logger.error(f"Graph Expansion failed: {e}")
|
||||||
subgraph = None
|
subgraph = None
|
||||||
|
|
||||||
# 3. Scoring & Explanation Generierung
|
# 3. Scoring & Explanation Generierung
|
||||||
|
|
@ -287,6 +306,5 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
class Retriever:
|
class Retriever:
|
||||||
"""Schnittstelle für die asynchrone Suche."""
|
"""Schnittstelle für die asynchrone Suche."""
|
||||||
async def search(self, request: QueryRequest) -> QueryResponse:
|
async def search(self, request: QueryRequest) -> QueryResponse:
|
||||||
"""Führt eine Suche durch. Nutzt hybrid_retrieve als Standard."""
|
"""Führt eine hybride Suche aus."""
|
||||||
# Standard ist Hybrid-Modus
|
|
||||||
return hybrid_retrieve(request)
|
return hybrid_retrieve(request)
|
||||||
|
|
@ -101,7 +101,7 @@ def compute_wp22_score(
|
||||||
cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
|
cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
|
||||||
|
|
||||||
# 4. Finales Zusammenführen (Merging)
|
# 4. Finales Zusammenführen (Merging)
|
||||||
# node_weight - 1.0 sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
|
# (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
|
||||||
total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)
|
total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)
|
||||||
|
|
||||||
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
|
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user