mindnet/app/core/graph/graph_db_adapter.py

"""
FILE: app/core/graph/graph_db_adapter.py
DESCRIPTION: Datenbeschaffung aus Qdrant für den Graphen.
             AUDIT v1.2.0: Gold-Standard v4.1.0 - Scope-Awareness & Section-Filtering.
             - Erweiterte Suche nach chunk_id-Edges für Scope-Awareness
             - Optionales target_section-Filtering für präzise Section-Links
             - Vollständige Metadaten-Unterstützung (provenance, confidence, virtual)
VERSION: 1.2.0 (WP-24c: Gold-Standard v4.1.0)
"""
from typing import List, Dict, Optional
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest

# Nutzt die zentrale Infrastruktur für konsistente Collection-Namen (WP-14)
from app.core.database import collection_names

def fetch_edges_from_qdrant(
    client: QdrantClient,
    prefix: str,
    seeds: List[str],
    edge_types: Optional[List[str]] = None,
    target_section: Optional[str] = None,
    chunk_ids: Optional[List[str]] = None,
    limit: int = 2048,
) -> List[Dict]:
    """
    Holt Edges aus der Datenbank basierend auf Seed-IDs.
    WP-24c v4.1.0: Scope-Aware Edge Retrieval mit Section-Filtering.

    Args:
        client: Qdrant Client
        prefix: Collection-Präfix
        seeds: Liste von Note-IDs für die Suche
        edge_types: Optionale Filterung nach Kanten-Typen
        target_section: Optionales Section-Filtering (für präzise Section-Links)
        chunk_ids: Optionale Liste von Chunk-IDs für Scope-Awareness (Chunk-Level Edges)
        limit: Maximale Anzahl zurückgegebener Edges
    """
    if not seeds or limit <= 0:
        return []

    # Konsistente Namensauflösung via database-Paket
    # Rückgabe: (notes_col, chunks_col, edges_col)
    _, _, edges_col = collection_names(prefix)

    # WP-24c v4.1.0: Scope-Awareness - Suche nach Note- UND Chunk-Level Edges
    seed_conditions = []
    for field in ("source_id", "target_id", "note_id"):
        for s in seeds:
            seed_conditions.append(
                rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s)))
            )

    # Chunk-Level Edges: Wenn chunk_ids angegeben, suche auch nach chunk_id als source_id
    if chunk_ids:
        for cid in chunk_ids:
            seed_conditions.append(
                rest.FieldCondition(key="source_id", match=rest.MatchValue(value=str(cid)))
            )

    seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None

    # Optionaler Filter auf spezifische Kanten-Typen (z.B. für Intent-Routing)
    type_filter = None
    if edge_types:
        type_conds = [
            rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k)))
            for k in edge_types
        ]
        type_filter = rest.Filter(should=type_conds)

    # WP-24c v4.1.0: Section-Filtering für präzise Section-Links
    section_filter = None
    if target_section:
        section_filter = rest.Filter(must=[
            rest.FieldCondition(key="target_section", match=rest.MatchValue(value=str(target_section)))
        ])

    must = []
    if seeds_filter:
        must.append(seeds_filter)
    if type_filter:
        must.append(type_filter)
    if section_filter:
        must.append(section_filter)

    flt = rest.Filter(must=must) if must else None

    # Abfrage via Qdrant Scroll API
    # WICHTIG: with_payload=True lädt alle Metadaten (target_section, provenance etc.)
    pts, _ = client.scroll(
        collection_name=edges_col,
        scroll_filter=flt,
        limit=limit,
        with_payload=True,
        with_vectors=False,
    )

    # Wir geben das vollständige Payload zurück, damit der Retriever
    # alle Signale für die Super-Edge-Aggregation und das Scoring hat.
    return [dict(p.payload) for p in pts if p.payload]