diff --git a/app/core/retriever.py b/app/core/retriever.py index 304488f..a7e7a7f 100644 --- a/app/core/retriever.py +++ b/app/core/retriever.py @@ -1,7 +1,7 @@ from __future__ import annotations import time -from typing import Any, Dict, List, Tuple, Iterable +from typing import Any, Dict, List, Tuple from app.config import get_settings from app.models.dto import QueryRequest, QueryResponse, QueryHit @@ -141,7 +141,8 @@ def _build_hits_from_semantic( edge_bonus = 0.0 cent_bonus = 0.0 if subgraph is not None: - node_key = payload.get("chunk_id") or payload.get("note_id") + # WICHTIG: Knoten im Graphen sind note_ids, nicht chunk_ids. + node_key = payload.get("note_id") if node_key: try: edge_bonus = float(subgraph.edge_bonus(node_key)) @@ -216,10 +217,12 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: depth, edge_types = _extract_expand_options(req) subgraph = None if depth > 0: - # Seeds: stabile IDs aus dem Payload (chunk_id bevorzugt, sonst note_id) + # Seeds: stabile IDs aus dem Payload + # WICHTIG: Wir verwenden note_id als Knoten-ID, da Edges zwischen Notes + # modelliert sind (source_id/target_id = note_id). seed_ids: List[str] = [] for _, _score, payload in hits: - key = payload.get("chunk_id") or payload.get("note_id") + key = payload.get("note_id") if key and key not in seed_ids: seed_ids.append(key)