app/core/graph_adapter.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
b8b6f243d1
commit
13b7c8858a
|
|
@ -1,90 +1,240 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
app/core/graph_adapter.py — Adjazenzaufbau & Subgraph-Expansion (WP-04)
|
app/core/graph_adapter.py — Adjazenzaufbau & Subgraph-Expansion (WP-04)
|
||||||
|
|
||||||
Zweck:
|
Zweck:
|
||||||
Baut aus Qdrant-Edges (Collection: *_edges) einen leichten In-Memory-Graph
|
Baut aus Qdrant-Edges (Collection: *_edges) einen leichten In-Memory-Graph
|
||||||
und liefert Edge-basierte Kennzahlen (In-Degree, Out-Degree, edge_bonus).
|
und liefert Edge-basierte Kennzahlen (In-Degree, Out-Degree, edge_bonus).
|
||||||
|
|
||||||
Kompatibilität:
|
Kompatibilität:
|
||||||
Python 3.12+, qdrant-client 1.x
|
- Python 3.12+, qdrant-client 1.x
|
||||||
|
- Wird von app/core/retriever.py im Hybrid-Modus genutzt.
|
||||||
|
- Signaturen bleiben kompatibel zu den bestehenden Tests
|
||||||
|
(tests/test_retriever_edges.py patcht expand()).
|
||||||
|
|
||||||
Version:
|
Version:
|
||||||
0.1.0 (Erstanlage)
|
0.2.0 (2025-11-30 – direkte Qdrant-Abfrage, confidence-basiertes Gewicht)
|
||||||
Stand:
|
|
||||||
2025-10-07
|
|
||||||
Bezug:
|
|
||||||
- WP-04 Edge-Gewichtungen und Heuristiken
|
|
||||||
- app/core/qdrant_points.py (get_edges_for_sources)
|
|
||||||
Nutzung:
|
|
||||||
from app.core.graph_adapter import expand
|
|
||||||
Änderungsverlauf:
|
|
||||||
0.1.0 (2025-10-07) – Erstanlage.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Dict, List, Optional, DefaultDict
|
from typing import Dict, List, Optional, DefaultDict
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from qdrant_client import QdrantClient
|
|
||||||
from app.core.qdrant_points import get_edges_for_sources
|
|
||||||
|
|
||||||
EDGE_BASE_WEIGHTS = {
|
from qdrant_client import QdrantClient
|
||||||
"references": 0.20,
|
from qdrant_client.http import models as rest
|
||||||
"belongs_to": 0.10,
|
|
||||||
"next": 0.06,
|
from app.core.qdrant import collection_names
|
||||||
"prev": 0.06,
|
|
||||||
"backlink": 0.04,
|
# Legacy-Import (wird aktuell nicht mehr verwendet, bleibt aber erhalten,
|
||||||
|
# damit bestehende Importe/Mocks nicht brechen).
|
||||||
|
try: # pragma: no cover
|
||||||
|
from app.core.qdrant_points import get_edges_for_sources # type: ignore
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
get_edges_for_sources = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
# Basisgewichte je Edge-Typ.
|
||||||
|
# Diese Werte werden mit der in der Edge-Payload hinterlegten "confidence"
|
||||||
|
# multipliziert, falls vorhanden.
|
||||||
|
EDGE_BASE_WEIGHTS: Dict[str, float] = {
|
||||||
|
"references": 0.20,
|
||||||
|
"belongs_to": 0.10,
|
||||||
|
"next": 0.06,
|
||||||
|
"prev": 0.06,
|
||||||
|
"backlink": 0.04,
|
||||||
"references_at": 0.08,
|
"references_at": 0.08,
|
||||||
|
# weitere Typen erhalten per Default 0.0 und wirken nur über centrality
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _edge_weight(pl: Dict) -> float:
|
||||||
|
"""Berechnet das effektive Edge-Gewicht aus kind + confidence."""
|
||||||
|
kind = pl.get("kind", "edge")
|
||||||
|
base = EDGE_BASE_WEIGHTS.get(kind, 0.0)
|
||||||
|
|
||||||
|
conf_raw = pl.get("confidence", None)
|
||||||
|
try:
|
||||||
|
conf = float(conf_raw) if conf_raw is not None else None
|
||||||
|
except Exception:
|
||||||
|
conf = None
|
||||||
|
|
||||||
|
if conf is None:
|
||||||
|
return base
|
||||||
|
|
||||||
|
# Confidence vorsichtig in [0.0, 1.0] clampen
|
||||||
|
if conf < 0.0:
|
||||||
|
conf = 0.0
|
||||||
|
if conf > 1.0:
|
||||||
|
conf = 1.0
|
||||||
|
|
||||||
|
return base * conf
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_edges(
|
||||||
|
client: QdrantClient,
|
||||||
|
prefix: str,
|
||||||
|
seeds: List[str],
|
||||||
|
edge_types: Optional[List[str]] = None,
|
||||||
|
limit: int = 2048,
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Holt Edges direkt aus der *_edges Collection.
|
||||||
|
|
||||||
|
Filter:
|
||||||
|
- source_id IN seeds ODER target_id IN seeds ODER note_id IN seeds
|
||||||
|
- optional: kind IN edge_types
|
||||||
|
"""
|
||||||
|
if not seeds or limit <= 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
_, _, edges_col = collection_names(prefix)
|
||||||
|
|
||||||
|
# OR über source_id / target_id / note_id für alle Seeds
|
||||||
|
seed_conditions = []
|
||||||
|
for field in ("source_id", "target_id", "note_id"):
|
||||||
|
for s in seeds:
|
||||||
|
seed_conditions.append(
|
||||||
|
rest.FieldCondition(
|
||||||
|
key=field,
|
||||||
|
match=rest.MatchValue(value=str(s)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
|
||||||
|
|
||||||
|
# Optional: Filter auf bestimmte Edge-Typen (kind)
|
||||||
|
type_filter = None
|
||||||
|
if edge_types:
|
||||||
|
type_conds = [
|
||||||
|
rest.FieldCondition(
|
||||||
|
key="kind",
|
||||||
|
match=rest.MatchValue(value=str(k)),
|
||||||
|
)
|
||||||
|
for k in edge_types
|
||||||
|
]
|
||||||
|
type_filter = rest.Filter(should=type_conds)
|
||||||
|
|
||||||
|
flt = None
|
||||||
|
must = []
|
||||||
|
if seeds_filter:
|
||||||
|
must.append(seeds_filter)
|
||||||
|
if type_filter:
|
||||||
|
must.append(type_filter)
|
||||||
|
if must:
|
||||||
|
flt = rest.Filter(must=must)
|
||||||
|
|
||||||
|
pts, _ = client.scroll(
|
||||||
|
collection_name=edges_col,
|
||||||
|
scroll_filter=flt,
|
||||||
|
limit=limit,
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
out: List[Dict] = []
|
||||||
|
for p in pts or []:
|
||||||
|
pl = dict(p.payload or {})
|
||||||
|
if pl:
|
||||||
|
out.append(pl)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
class Subgraph:
|
class Subgraph:
|
||||||
"""Leichtgewichtiger Subgraph mit Adjazenzlisten & einfachen Kennzahlen."""
|
"""Leichtgewichtiger Subgraph mit Adjazenzlisten & einfachen Kennzahlen."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
|
self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
|
||||||
self.in_degree: DefaultDict[str, int] = defaultdict(int)
|
self.in_degree: DefaultDict[str, int] = defaultdict(int)
|
||||||
self.out_degree: DefaultDict[str, int] = defaultdict(int)
|
self.out_degree: DefaultDict[str, int] = defaultdict(int)
|
||||||
|
|
||||||
def add_edge(self, e: Dict):
|
def add_edge(self, e: Dict) -> None:
|
||||||
src = e["source"]; tgt = e["target"]; kind = e["kind"]
|
src = e["source"]
|
||||||
|
tgt = e["target"]
|
||||||
|
kind = e["kind"]
|
||||||
weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
|
weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
|
||||||
self.adj[src].append({"target": tgt, "kind": kind, "weight": weight})
|
|
||||||
|
if not src or not tgt:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.adj[src].append(
|
||||||
|
{
|
||||||
|
"target": tgt,
|
||||||
|
"kind": kind,
|
||||||
|
"weight": weight,
|
||||||
|
}
|
||||||
|
)
|
||||||
self.out_degree[src] += 1
|
self.out_degree[src] += 1
|
||||||
self.in_degree[tgt] += 1
|
self.in_degree[tgt] += 1
|
||||||
|
|
||||||
def aggregate_edge_bonus(self, node_id: str) -> float:
|
def aggregate_edge_bonus(self, node_id: str) -> float:
|
||||||
|
"""
|
||||||
|
Summe der ausgehenden Kantengewichte für einen Knoten.
|
||||||
|
"""
|
||||||
return sum(edge["weight"] for edge in self.adj.get(node_id, []))
|
return sum(edge["weight"] for edge in self.adj.get(node_id, []))
|
||||||
|
|
||||||
def centrality_bonus(self, node_id: str) -> float:
|
def centrality_bonus(self, node_id: str) -> float:
|
||||||
|
"""
|
||||||
|
Einfache log-gedämpfte Zentralität auf Basis der In-Degree.
|
||||||
|
Obergrenze: 0.15
|
||||||
|
"""
|
||||||
import math
|
import math
|
||||||
# Log-gedämpfter Bonus, hart begrenzt
|
|
||||||
return min(math.log1p(self.in_degree.get(node_id, 0)) / 10.0, 0.15)
|
indeg = self.in_degree.get(node_id, 0)
|
||||||
|
if indeg <= 0:
|
||||||
|
return 0.0
|
||||||
|
return min(math.log1p(indeg) / 10.0, 0.15)
|
||||||
|
|
||||||
|
|
||||||
def expand(client: QdrantClient, prefix: str, seeds: List[str],
|
def expand(
|
||||||
depth: int = 1, edge_types: Optional[List[str]] = None) -> Subgraph:
|
client: QdrantClient,
|
||||||
|
prefix: str,
|
||||||
|
seeds: List[str],
|
||||||
|
depth: int = 1,
|
||||||
|
edge_types: Optional[List[str]] = None,
|
||||||
|
) -> Subgraph:
|
||||||
"""
|
"""
|
||||||
Expandiert ab Seeds entlang von Edges (bis depth), optional gefiltert nach Typen.
|
Expandiert ab Seeds entlang von Edges (bis `depth`), optional gefiltert
|
||||||
Seeds sind stabile payload-IDs (z. B. note_id, chunk_id).
|
nach Edge-Typen.
|
||||||
|
|
||||||
|
Seeds sind stabile payload-IDs (z. B. note_id, chunk_id). Es werden Edges
|
||||||
|
berücksichtigt, bei denen source_id ODER target_id ODER note_id einem der
|
||||||
|
Seeds entspricht.
|
||||||
"""
|
"""
|
||||||
sg = Subgraph()
|
sg = Subgraph()
|
||||||
frontier = set(seeds)
|
frontier = set(seeds)
|
||||||
visited = set()
|
visited = set()
|
||||||
|
|
||||||
for _ in range(max(depth, 0)):
|
max_depth = max(depth, 0)
|
||||||
|
|
||||||
|
for _ in range(max_depth):
|
||||||
if not frontier:
|
if not frontier:
|
||||||
break
|
break
|
||||||
|
|
||||||
edges = get_edges_for_sources(client, prefix, list(frontier), edge_types=edge_types, limit=2048)
|
edges_payloads = _fetch_edges(
|
||||||
|
client=client,
|
||||||
|
prefix=prefix,
|
||||||
|
seeds=list(frontier),
|
||||||
|
edge_types=edge_types,
|
||||||
|
limit=2048,
|
||||||
|
)
|
||||||
|
|
||||||
next_frontier = set()
|
next_frontier = set()
|
||||||
for pl in edges:
|
for pl in edges_payloads:
|
||||||
|
src = pl.get("source_id")
|
||||||
|
tgt = pl.get("target_id")
|
||||||
|
kind = pl.get("kind", "edge")
|
||||||
|
|
||||||
e = {
|
e = {
|
||||||
"source": pl.get("source_id"),
|
"source": src,
|
||||||
"target": pl.get("target_id"),
|
"target": tgt,
|
||||||
"kind": pl.get("kind", "edge"),
|
"kind": kind,
|
||||||
"weight": pl.get("weight", EDGE_BASE_WEIGHTS.get(pl.get("kind", "edge"), 0.0)),
|
"weight": _edge_weight(pl),
|
||||||
}
|
}
|
||||||
sg.add_edge(e)
|
sg.add_edge(e)
|
||||||
if e["target"]:
|
|
||||||
next_frontier.add(e["target"])
|
if tgt:
|
||||||
|
next_frontier.add(tgt)
|
||||||
|
|
||||||
visited |= frontier
|
visited |= frontier
|
||||||
frontier = next_frontier - visited
|
frontier = next_frontier - visited
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user