""" app/core/graph_adapter.py — Adjazenzaufbau & Subgraph-Expansion (WP-04) Zweck: Baut aus Qdrant-Edges (Collection: *_edges) einen leichten In-Memory-Graph und liefert Edge-basierte Kennzahlen (In-Degree, Out-Degree, edge_bonus). Kompatibilität: Python 3.12+, qdrant-client 1.x Version: 0.1.0 (Erstanlage) Stand: 2025-10-07 Bezug: - WP-04 Edge-Gewichtungen und Heuristiken - app/core/qdrant_points.py (get_edges_for_sources) Nutzung: from app.core.graph_adapter import expand Änderungsverlauf: 0.1.0 (2025-10-07) – Erstanlage. """ from __future__ import annotations from typing import Dict, List, Optional, DefaultDict from collections import defaultdict from qdrant_client import QdrantClient from app.core.qdrant_points import get_edges_for_sources EDGE_BASE_WEIGHTS = { "references": 0.20, "belongs_to": 0.10, "next": 0.06, "prev": 0.06, "backlink": 0.04, "references_at": 0.08, } class Subgraph: """Leichtgewichtiger Subgraph mit Adjazenzlisten & einfachen Kennzahlen.""" def __init__(self): self.adj: DefaultDict[str, List[Dict]] = defaultdict(list) self.in_degree: DefaultDict[str, int] = defaultdict(int) self.out_degree: DefaultDict[str, int] = defaultdict(int) def add_edge(self, e: Dict): src = e["source"]; tgt = e["target"]; kind = e["kind"] weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0)) self.adj[src].append({"target": tgt, "kind": kind, "weight": weight}) self.out_degree[src] += 1 self.in_degree[tgt] += 1 def aggregate_edge_bonus(self, node_id: str) -> float: return sum(edge["weight"] for edge in self.adj.get(node_id, [])) def centrality_bonus(self, node_id: str) -> float: import math # Log-gedämpfter Bonus, hart begrenzt return min(math.log1p(self.in_degree.get(node_id, 0)) / 10.0, 0.15) def expand(client: QdrantClient, prefix: str, seeds: List[str], depth: int = 1, edge_types: Optional[List[str]] = None) -> Subgraph: """ Expandiert ab Seeds entlang von Edges (bis depth), optional gefiltert nach Typen. Seeds sind stabile payload-IDs (z. B. note_id, chunk_id). """ sg = Subgraph() frontier = set(seeds) visited = set() for _ in range(max(depth, 0)): if not frontier: break edges = get_edges_for_sources(client, prefix, list(frontier), edge_types=edge_types, limit=2048) next_frontier = set() for pl in edges: e = { "source": pl.get("source_id"), "target": pl.get("target_id"), "kind": pl.get("kind", "edge"), "weight": pl.get("weight", EDGE_BASE_WEIGHTS.get(pl.get("kind", "edge"), 0.0)), } sg.add_edge(e) if e["target"]: next_frontier.add(e["target"]) visited |= frontier frontier = next_frontier - visited return sg