From 17b548e9b03d0fb5dfd353862da02761496fd15b Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 7 Oct 2025 11:30:08 +0200 Subject: [PATCH] =?UTF-8?q?app/core/graph=5Fadapter.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/core/graph_adapter.py | 92 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 app/core/graph_adapter.py diff --git a/app/core/graph_adapter.py b/app/core/graph_adapter.py new file mode 100644 index 0000000..0c39771 --- /dev/null +++ b/app/core/graph_adapter.py @@ -0,0 +1,92 @@ +""" +app/core/graph_adapter.py — Adjazenzaufbau & Subgraph-Expansion (WP-04) + +Zweck: + Baut aus Qdrant-Edges (Collection: *_edges) einen leichten In-Memory-Graph + und liefert Edge-basierte Kennzahlen (In-Degree, Out-Degree, edge_bonus). +Kompatibilität: + Python 3.12+, qdrant-client 1.x +Version: + 0.1.0 (Erstanlage) +Stand: + 2025-10-07 +Bezug: + - WP-04 Edge-Gewichtungen und Heuristiken + - app/core/qdrant_points.py (get_edges_for_sources) +Nutzung: + from app.core.graph_adapter import expand +Änderungsverlauf: + 0.1.0 (2025-10-07) – Erstanlage. +""" + +from __future__ import annotations +from typing import Dict, List, Optional, DefaultDict +from collections import defaultdict +from qdrant_client import QdrantClient +from app.core.qdrant_points import get_edges_for_sources + +EDGE_BASE_WEIGHTS = { + "references": 0.20, + "belongs_to": 0.10, + "next": 0.06, + "prev": 0.06, + "backlink": 0.04, + "references_at": 0.08, +} + + +class Subgraph: + """Leichtgewichtiger Subgraph mit Adjazenzlisten & einfachen Kennzahlen.""" + + def __init__(self): + self.adj: DefaultDict[str, List[Dict]] = defaultdict(list) + self.in_degree: DefaultDict[str, int] = defaultdict(int) + self.out_degree: DefaultDict[str, int] = defaultdict(int) + + def add_edge(self, e: Dict): + src = e["source"]; tgt = e["target"]; kind = e["kind"] + weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0)) + self.adj[src].append({"target": tgt, "kind": kind, "weight": weight}) + self.out_degree[src] += 1 + self.in_degree[tgt] += 1 + + def aggregate_edge_bonus(self, node_id: str) -> float: + return sum(edge["weight"] for edge in self.adj.get(node_id, [])) + + def centrality_bonus(self, node_id: str) -> float: + import math + # Log-gedämpfter Bonus, hart begrenzt + return min(math.log1p(self.in_degree.get(node_id, 0)) / 10.0, 0.15) + + +def expand(client: QdrantClient, prefix: str, seeds: List[str], + depth: int = 1, edge_types: Optional[List[str]] = None) -> Subgraph: + """ + Expandiert ab Seeds entlang von Edges (bis depth), optional gefiltert nach Typen. + Seeds sind stabile payload-IDs (z. B. note_id, chunk_id). + """ + sg = Subgraph() + frontier = set(seeds) + visited = set() + + for _ in range(max(depth, 0)): + if not frontier: + break + + edges = get_edges_for_sources(client, prefix, list(frontier), edge_types=edge_types, limit=2048) + next_frontier = set() + for pl in edges: + e = { + "source": pl.get("source_id"), + "target": pl.get("target_id"), + "kind": pl.get("kind", "edge"), + "weight": pl.get("weight", EDGE_BASE_WEIGHTS.get(pl.get("kind", "edge"), 0.0)), + } + sg.add_edge(e) + if e["target"]: + next_frontier.add(e["target"]) + + visited |= frontier + frontier = next_frontier - visited + + return sg