app/core/graph_adapter.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-12-03 18:40:06 +01:00
parent b8b6f243d1
commit 13b7c8858a

View File

@ -1,90 +1,240 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" """
app/core/graph_adapter.py Adjazenzaufbau & Subgraph-Expansion (WP-04) app/core/graph_adapter.py Adjazenzaufbau & Subgraph-Expansion (WP-04)
Zweck: Zweck:
Baut aus Qdrant-Edges (Collection: *_edges) einen leichten In-Memory-Graph Baut aus Qdrant-Edges (Collection: *_edges) einen leichten In-Memory-Graph
und liefert Edge-basierte Kennzahlen (In-Degree, Out-Degree, edge_bonus). und liefert Edge-basierte Kennzahlen (In-Degree, Out-Degree, edge_bonus).
Kompatibilität: Kompatibilität:
Python 3.12+, qdrant-client 1.x - Python 3.12+, qdrant-client 1.x
- Wird von app/core/retriever.py im Hybrid-Modus genutzt.
- Signaturen bleiben kompatibel zu den bestehenden Tests
(tests/test_retriever_edges.py patcht expand()).
Version: Version:
0.1.0 (Erstanlage) 0.2.0 (2025-11-30 direkte Qdrant-Abfrage, confidence-basiertes Gewicht)
Stand:
2025-10-07
Bezug:
- WP-04 Edge-Gewichtungen und Heuristiken
- app/core/qdrant_points.py (get_edges_for_sources)
Nutzung:
from app.core.graph_adapter import expand
Änderungsverlauf:
0.1.0 (2025-10-07) Erstanlage.
""" """
from __future__ import annotations from __future__ import annotations
from typing import Dict, List, Optional, DefaultDict from typing import Dict, List, Optional, DefaultDict
from collections import defaultdict from collections import defaultdict
from qdrant_client import QdrantClient
from app.core.qdrant_points import get_edges_for_sources
EDGE_BASE_WEIGHTS = { from qdrant_client import QdrantClient
"references": 0.20, from qdrant_client.http import models as rest
"belongs_to": 0.10,
"next": 0.06, from app.core.qdrant import collection_names
"prev": 0.06,
"backlink": 0.04, # Legacy-Import (wird aktuell nicht mehr verwendet, bleibt aber erhalten,
# damit bestehende Importe/Mocks nicht brechen).
try: # pragma: no cover
from app.core.qdrant_points import get_edges_for_sources # type: ignore
except Exception: # pragma: no cover
get_edges_for_sources = None # type: ignore
# Basisgewichte je Edge-Typ.
# Diese Werte werden mit der in der Edge-Payload hinterlegten "confidence"
# multipliziert, falls vorhanden.
EDGE_BASE_WEIGHTS: Dict[str, float] = {
"references": 0.20,
"belongs_to": 0.10,
"next": 0.06,
"prev": 0.06,
"backlink": 0.04,
"references_at": 0.08, "references_at": 0.08,
# weitere Typen erhalten per Default 0.0 und wirken nur über centrality
} }
def _edge_weight(pl: Dict) -> float:
"""Berechnet das effektive Edge-Gewicht aus kind + confidence."""
kind = pl.get("kind", "edge")
base = EDGE_BASE_WEIGHTS.get(kind, 0.0)
conf_raw = pl.get("confidence", None)
try:
conf = float(conf_raw) if conf_raw is not None else None
except Exception:
conf = None
if conf is None:
return base
# Confidence vorsichtig in [0.0, 1.0] clampen
if conf < 0.0:
conf = 0.0
if conf > 1.0:
conf = 1.0
return base * conf
def _fetch_edges(
client: QdrantClient,
prefix: str,
seeds: List[str],
edge_types: Optional[List[str]] = None,
limit: int = 2048,
) -> List[Dict]:
"""
Holt Edges direkt aus der *_edges Collection.
Filter:
- source_id IN seeds ODER target_id IN seeds ODER note_id IN seeds
- optional: kind IN edge_types
"""
if not seeds or limit <= 0:
return []
_, _, edges_col = collection_names(prefix)
# OR über source_id / target_id / note_id für alle Seeds
seed_conditions = []
for field in ("source_id", "target_id", "note_id"):
for s in seeds:
seed_conditions.append(
rest.FieldCondition(
key=field,
match=rest.MatchValue(value=str(s)),
)
)
seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
# Optional: Filter auf bestimmte Edge-Typen (kind)
type_filter = None
if edge_types:
type_conds = [
rest.FieldCondition(
key="kind",
match=rest.MatchValue(value=str(k)),
)
for k in edge_types
]
type_filter = rest.Filter(should=type_conds)
flt = None
must = []
if seeds_filter:
must.append(seeds_filter)
if type_filter:
must.append(type_filter)
if must:
flt = rest.Filter(must=must)
pts, _ = client.scroll(
collection_name=edges_col,
scroll_filter=flt,
limit=limit,
with_payload=True,
with_vectors=False,
)
out: List[Dict] = []
for p in pts or []:
pl = dict(p.payload or {})
if pl:
out.append(pl)
return out
class Subgraph: class Subgraph:
"""Leichtgewichtiger Subgraph mit Adjazenzlisten & einfachen Kennzahlen.""" """Leichtgewichtiger Subgraph mit Adjazenzlisten & einfachen Kennzahlen."""
def __init__(self): def __init__(self) -> None:
self.adj: DefaultDict[str, List[Dict]] = defaultdict(list) self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
self.in_degree: DefaultDict[str, int] = defaultdict(int) self.in_degree: DefaultDict[str, int] = defaultdict(int)
self.out_degree: DefaultDict[str, int] = defaultdict(int) self.out_degree: DefaultDict[str, int] = defaultdict(int)
def add_edge(self, e: Dict): def add_edge(self, e: Dict) -> None:
src = e["source"]; tgt = e["target"]; kind = e["kind"] src = e["source"]
tgt = e["target"]
kind = e["kind"]
weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0)) weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
self.adj[src].append({"target": tgt, "kind": kind, "weight": weight})
if not src or not tgt:
return
self.adj[src].append(
{
"target": tgt,
"kind": kind,
"weight": weight,
}
)
self.out_degree[src] += 1 self.out_degree[src] += 1
self.in_degree[tgt] += 1 self.in_degree[tgt] += 1
def aggregate_edge_bonus(self, node_id: str) -> float: def aggregate_edge_bonus(self, node_id: str) -> float:
"""
Summe der ausgehenden Kantengewichte für einen Knoten.
"""
return sum(edge["weight"] for edge in self.adj.get(node_id, [])) return sum(edge["weight"] for edge in self.adj.get(node_id, []))
def centrality_bonus(self, node_id: str) -> float: def centrality_bonus(self, node_id: str) -> float:
"""
Einfache log-gedämpfte Zentralität auf Basis der In-Degree.
Obergrenze: 0.15
"""
import math import math
# Log-gedämpfter Bonus, hart begrenzt
return min(math.log1p(self.in_degree.get(node_id, 0)) / 10.0, 0.15) indeg = self.in_degree.get(node_id, 0)
if indeg <= 0:
return 0.0
return min(math.log1p(indeg) / 10.0, 0.15)
def expand(client: QdrantClient, prefix: str, seeds: List[str], def expand(
depth: int = 1, edge_types: Optional[List[str]] = None) -> Subgraph: client: QdrantClient,
prefix: str,
seeds: List[str],
depth: int = 1,
edge_types: Optional[List[str]] = None,
) -> Subgraph:
""" """
Expandiert ab Seeds entlang von Edges (bis depth), optional gefiltert nach Typen. Expandiert ab Seeds entlang von Edges (bis `depth`), optional gefiltert
Seeds sind stabile payload-IDs (z. B. note_id, chunk_id). nach Edge-Typen.
Seeds sind stabile payload-IDs (z. B. note_id, chunk_id). Es werden Edges
berücksichtigt, bei denen source_id ODER target_id ODER note_id einem der
Seeds entspricht.
""" """
sg = Subgraph() sg = Subgraph()
frontier = set(seeds) frontier = set(seeds)
visited = set() visited = set()
for _ in range(max(depth, 0)): max_depth = max(depth, 0)
for _ in range(max_depth):
if not frontier: if not frontier:
break break
edges = get_edges_for_sources(client, prefix, list(frontier), edge_types=edge_types, limit=2048) edges_payloads = _fetch_edges(
client=client,
prefix=prefix,
seeds=list(frontier),
edge_types=edge_types,
limit=2048,
)
next_frontier = set() next_frontier = set()
for pl in edges: for pl in edges_payloads:
src = pl.get("source_id")
tgt = pl.get("target_id")
kind = pl.get("kind", "edge")
e = { e = {
"source": pl.get("source_id"), "source": src,
"target": pl.get("target_id"), "target": tgt,
"kind": pl.get("kind", "edge"), "kind": kind,
"weight": pl.get("weight", EDGE_BASE_WEIGHTS.get(pl.get("kind", "edge"), 0.0)), "weight": _edge_weight(pl),
} }
sg.add_edge(e) sg.add_edge(e)
if e["target"]:
next_frontier.add(e["target"]) if tgt:
next_frontier.add(tgt)
visited |= frontier visited |= frontier
frontier = next_frontier - visited frontier = next_frontier - visited