101 lines
3.7 KiB
Python
101 lines
3.7 KiB
Python
"""
|
|
FILE: app/core/graph/graph_db_adapter.py
|
|
DESCRIPTION: Datenbeschaffung aus Qdrant für den Graphen.
|
|
AUDIT v1.2.0: Gold-Standard v4.1.0 - Scope-Awareness & Section-Filtering.
|
|
- Erweiterte Suche nach chunk_id-Edges für Scope-Awareness
|
|
- Optionales target_section-Filtering für präzise Section-Links
|
|
- Vollständige Metadaten-Unterstützung (provenance, confidence, virtual)
|
|
VERSION: 1.2.0 (WP-24c: Gold-Standard v4.1.0)
|
|
"""
|
|
from typing import List, Dict, Optional
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.http import models as rest
|
|
|
|
# Nutzt die zentrale Infrastruktur für konsistente Collection-Namen (WP-14)
|
|
from app.core.database import collection_names
|
|
|
|
def fetch_edges_from_qdrant(
|
|
client: QdrantClient,
|
|
prefix: str,
|
|
seeds: List[str],
|
|
edge_types: Optional[List[str]] = None,
|
|
target_section: Optional[str] = None,
|
|
chunk_ids: Optional[List[str]] = None,
|
|
limit: int = 2048,
|
|
) -> List[Dict]:
|
|
"""
|
|
Holt Edges aus der Datenbank basierend auf Seed-IDs.
|
|
WP-24c v4.1.0: Scope-Aware Edge Retrieval mit Section-Filtering.
|
|
|
|
Args:
|
|
client: Qdrant Client
|
|
prefix: Collection-Präfix
|
|
seeds: Liste von Note-IDs für die Suche
|
|
edge_types: Optionale Filterung nach Kanten-Typen
|
|
target_section: Optionales Section-Filtering (für präzise Section-Links)
|
|
chunk_ids: Optionale Liste von Chunk-IDs für Scope-Awareness (Chunk-Level Edges)
|
|
limit: Maximale Anzahl zurückgegebener Edges
|
|
"""
|
|
if not seeds or limit <= 0:
|
|
return []
|
|
|
|
# Konsistente Namensauflösung via database-Paket
|
|
# Rückgabe: (notes_col, chunks_col, edges_col)
|
|
_, _, edges_col = collection_names(prefix)
|
|
|
|
# WP-24c v4.1.0: Scope-Awareness - Suche nach Note- UND Chunk-Level Edges
|
|
seed_conditions = []
|
|
for field in ("source_id", "target_id", "note_id"):
|
|
for s in seeds:
|
|
seed_conditions.append(
|
|
rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s)))
|
|
)
|
|
|
|
# Chunk-Level Edges: Wenn chunk_ids angegeben, suche auch nach chunk_id als source_id
|
|
if chunk_ids:
|
|
for cid in chunk_ids:
|
|
seed_conditions.append(
|
|
rest.FieldCondition(key="source_id", match=rest.MatchValue(value=str(cid)))
|
|
)
|
|
|
|
seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
|
|
|
|
# Optionaler Filter auf spezifische Kanten-Typen (z.B. für Intent-Routing)
|
|
type_filter = None
|
|
if edge_types:
|
|
type_conds = [
|
|
rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k)))
|
|
for k in edge_types
|
|
]
|
|
type_filter = rest.Filter(should=type_conds)
|
|
|
|
# WP-24c v4.1.0: Section-Filtering für präzise Section-Links
|
|
section_filter = None
|
|
if target_section:
|
|
section_filter = rest.Filter(must=[
|
|
rest.FieldCondition(key="target_section", match=rest.MatchValue(value=str(target_section)))
|
|
])
|
|
|
|
must = []
|
|
if seeds_filter:
|
|
must.append(seeds_filter)
|
|
if type_filter:
|
|
must.append(type_filter)
|
|
if section_filter:
|
|
must.append(section_filter)
|
|
|
|
flt = rest.Filter(must=must) if must else None
|
|
|
|
# Abfrage via Qdrant Scroll API
|
|
# WICHTIG: with_payload=True lädt alle Metadaten (target_section, provenance etc.)
|
|
pts, _ = client.scroll(
|
|
collection_name=edges_col,
|
|
scroll_filter=flt,
|
|
limit=limit,
|
|
with_payload=True,
|
|
with_vectors=False,
|
|
)
|
|
|
|
# Wir geben das vollständige Payload zurück, damit der Retriever
|
|
# alle Signale für die Super-Edge-Aggregation und das Scoring hat.
|
|
return [dict(p.payload) for p in pts if p.payload] |