From 3111a26229f074dbfada936d7a635c0bade4a196 Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 7 Oct 2025 13:34:53 +0200 Subject: [PATCH] app/core/retriever.py aktualisiert --- app/core/retriever.py | 50 +++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/app/core/retriever.py b/app/core/retriever.py index 8484474..a8b9af3 100644 --- a/app/core/retriever.py +++ b/app/core/retriever.py @@ -2,21 +2,24 @@ app/core/retriever.py — Semantischer/Edge-Aware/Hybrid Retriever (WP-04) Zweck: - Kandidatenfindung via Vektorsuche in *_chunks, optionale Edge-Expansion und - kombiniertes Ranking zur Rückgabe von Top-K Treffern. + Kandidatenfindung via Vektorsuche in *_chunks, optionale Edge-Expansion + und kombiniertes Ranking zur Rückgabe von Top-K Treffern. + Erweiterung (0.2.0): Text→Embedding, falls kein query_vector übergeben wurde. + Kompatibilität: Python 3.12+, qdrant-client 1.x Version: - 0.1.0 (Erstanlage) + 0.2.0 (Text→Embedding ergänzt; bestehendes Verhalten unverändert) Stand: 2025-10-07 Bezug: - app/core/graph_adapter.py (expand) - app/core/ranking.py (combine_scores) - app/core/qdrant_points.py (search_chunks_by_vector) -Nutzung: - from app.core.retriever import hybrid_retrieve + - app/services/embeddings_client.py (embed_text) + - app/models/dto.py (QueryRequest/Response) Änderungsverlauf: + 0.2.0 (2025-10-07) – Text→Embedding (embed_text_if_needed). 0.1.0 (2025-10-07) – Erstanlage. """ @@ -30,18 +33,21 @@ from app.core.ranking import combine_scores from app.core.graph_adapter import expand from app.core import qdrant_points as qp from app.config import get_settings +from app.services.embeddings_client import embed_text -def _require_query_vector(req: QueryRequest) -> List[float]: +def _vector_from_request(req: QueryRequest) -> List[float]: """ - Für den Schnelltest ohne eingebundene Embeddings muss query_vector gesetzt sein. - Später kann hier der Embed-Aufruf (Text → 384d) angebunden werden. + Query-Vektor bestimmen: + - Falls query_vector gesetzt: unverändert verwenden (Back-compat, Tests). + - Sonst, falls query gesetzt: serverseitig einbetten. + - Andernfalls: Fehler. """ - if not req.query_vector: - raise ValueError( - "query_vector fehlt. Für den Quick-Test ohne Embeddings bitte einen 384d-Vektor übergeben." - ) - return req.query_vector + if req.query_vector: + return req.query_vector + if req.query: + return embed_text(req.query) + raise ValueError("query_vector fehlt. Alternativ 'query' (Text) übergeben, wird serverseitig eingebettet.") def semantic_retrieve(req: QueryRequest) -> QueryResponse: @@ -50,9 +56,8 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse: s = get_settings() client = QdrantClient(url=s.QDRANT_URL, api_key=s.QDRANT_API_KEY) - q_vec = _require_query_vector(req) + q_vec = _vector_from_request(req) raw_hits = qp.search_chunks_by_vector(client, s.COLLECTION_PREFIX, q_vec, top=req.top_k, filters=req.filters) - id2payload = {pid: payload for (pid, score, payload) in raw_hits} results: List[QueryHit] = [] for pid, s_score, payload in raw_hits: @@ -62,11 +67,10 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse: semantic_score=float(s_score), edge_bonus=0.0, centrality_bonus=0.0, - total_score=float(s_score), # hier un-normalisiert; ok für schnelle Prüfung + total_score=float(s_score), # un-normalisiert: ok für quick semantic mode paths=None, source={"path": payload.get("path"), "section": payload.get("section_title")} )) - dt = int((time.time() - t0) * 1000) return QueryResponse(results=results, used_mode="semantic", latency_ms=dt) @@ -77,11 +81,11 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: s = get_settings() client = QdrantClient(url=s.QDRANT_URL, api_key=s.QDRANT_API_KEY) - q_vec = _require_query_vector(req) + q_vec = _vector_from_request(req) # 1) Semantische Seeds (top_k * 3 für breitere Basis) raw_hits = qp.search_chunks_by_vector(client, s.COLLECTION_PREFIX, q_vec, top=req.top_k * 3, filters=req.filters) - id2payload = {pid: payload for (pid, score, payload) in raw_hits} + id2payload = {pid: payload for (pid, _, payload) in raw_hits} seeds = [pid for (pid, _, _) in raw_hits] # 2) Edge-Expansion @@ -93,10 +97,10 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: centrality_map = {pid: sg.centrality_bonus(pid) for pid in seeds} # 3) Combined Ranking - scored = combine_scores(raw_hits, edge_bonus_map, centrality_map, - w_sem=s.RETRIEVER_W_SEM, - w_edge=s.RETRIEVER_W_EDGE, - w_cent=s.RETRIEVER_W_CENT) + scored = combine_scores( + raw_hits, edge_bonus_map, centrality_map, + w_sem=s.RETRIEVER_W_SEM, w_edge=s.RETRIEVER_W_EDGE, w_cent=s.RETRIEVER_W_CENT + ) # 4) Antwortobjekte (Chunk-Ebene) results: List[QueryHit] = []