""" FILE: app/core/graph/graph_db_adapter.py DESCRIPTION: Datenbeschaffung aus Qdrant für den Graphen. AUDIT v1.2.0: Gold-Standard v4.1.0 - Scope-Awareness & Section-Filtering. - Erweiterte Suche nach chunk_id-Edges für Scope-Awareness - Optionales target_section-Filtering für präzise Section-Links - Vollständige Metadaten-Unterstützung (provenance, confidence, virtual) VERSION: 1.2.0 (WP-24c: Gold-Standard v4.1.0) """ from typing import List, Dict, Optional from qdrant_client import QdrantClient from qdrant_client.http import models as rest # Nutzt die zentrale Infrastruktur für konsistente Collection-Namen (WP-14) from app.core.database import collection_names def fetch_edges_from_qdrant( client: QdrantClient, prefix: str, seeds: List[str], edge_types: Optional[List[str]] = None, target_section: Optional[str] = None, chunk_ids: Optional[List[str]] = None, limit: int = 2048, ) -> List[Dict]: """ Holt Edges aus der Datenbank basierend auf Seed-IDs. WP-24c v4.1.0: Scope-Aware Edge Retrieval mit Section-Filtering. Args: client: Qdrant Client prefix: Collection-Präfix seeds: Liste von Note-IDs für die Suche edge_types: Optionale Filterung nach Kanten-Typen target_section: Optionales Section-Filtering (für präzise Section-Links) chunk_ids: Optionale Liste von Chunk-IDs für Scope-Awareness (Chunk-Level Edges) limit: Maximale Anzahl zurückgegebener Edges """ if not seeds or limit <= 0: return [] # Konsistente Namensauflösung via database-Paket # Rückgabe: (notes_col, chunks_col, edges_col) _, _, edges_col = collection_names(prefix) # WP-24c v4.1.0: Scope-Awareness - Suche nach Note- UND Chunk-Level Edges seed_conditions = [] for field in ("source_id", "target_id", "note_id"): for s in seeds: seed_conditions.append( rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s))) ) # Chunk-Level Edges: Wenn chunk_ids angegeben, suche auch nach chunk_id als source_id if chunk_ids: for cid in chunk_ids: seed_conditions.append( rest.FieldCondition(key="source_id", match=rest.MatchValue(value=str(cid))) ) seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None # Optionaler Filter auf spezifische Kanten-Typen (z.B. für Intent-Routing) type_filter = None if edge_types: type_conds = [ rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k))) for k in edge_types ] type_filter = rest.Filter(should=type_conds) # WP-24c v4.1.0: Section-Filtering für präzise Section-Links section_filter = None if target_section: section_filter = rest.Filter(must=[ rest.FieldCondition(key="target_section", match=rest.MatchValue(value=str(target_section))) ]) must = [] if seeds_filter: must.append(seeds_filter) if type_filter: must.append(type_filter) if section_filter: must.append(section_filter) flt = rest.Filter(must=must) if must else None # Abfrage via Qdrant Scroll API # WICHTIG: with_payload=True lädt alle Metadaten (target_section, provenance etc.) pts, _ = client.scroll( collection_name=edges_col, scroll_filter=flt, limit=limit, with_payload=True, with_vectors=False, ) # Wir geben das vollständige Payload zurück, damit der Retriever # alle Signale für die Super-Edge-Aggregation und das Scoring hat. return [dict(p.payload) for p in pts if p.payload]