wiederhergstellt
This commit is contained in:
parent
83bb18b6a7
commit
9025af62f0
27
app/embeddings.py
Normal file
27
app/embeddings.py
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
"""
|
||||||
|
FILE: app/embeddings.py
|
||||||
|
DESCRIPTION: Lokaler Wrapper für SentenceTransformer Embeddings.
|
||||||
|
VERSION: 0.1.0
|
||||||
|
STATUS: Active (Bestätigung durch Aufrufer erforderlich)
|
||||||
|
DEPENDENCIES: app.config, sentence_transformers
|
||||||
|
LAST_ANALYSIS: 2025-12-15
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import List
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from .config import get_settings
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def _load_model():
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
settings = get_settings()
|
||||||
|
model = SentenceTransformer(settings.MODEL_NAME, device="cpu")
|
||||||
|
return model
|
||||||
|
|
||||||
|
def embed_texts(texts: List[str]) -> list[list[float]]:
|
||||||
|
model = _load_model()
|
||||||
|
texts = [t if isinstance(t, str) else str(t) for t in texts]
|
||||||
|
vecs = model.encode(texts, normalize_embeddings=True, convert_to_numpy=False)
|
||||||
|
return [list(map(float, v)) for v in vecs]
|
||||||
172
app/graph/service.py
Normal file
172
app/graph/service.py
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Modul: app/graph/service.py
|
||||||
|
Version: 0.1.0
|
||||||
|
Datum: 2025-09-10
|
||||||
|
|
||||||
|
Zweck
|
||||||
|
-----
|
||||||
|
Leichtgewichtiger Graph-Layer über Qdrant:
|
||||||
|
- get_note(note_id)
|
||||||
|
- get_chunks(note_id)
|
||||||
|
- neighbors(source_id, kinds=[...], scope=['note','chunk'], depth=1)
|
||||||
|
- walk_bfs(source_id, kinds, max_depth)
|
||||||
|
- context_for_note(note_id, max_neighbors): heuristische Kontextsammlung
|
||||||
|
|
||||||
|
Hinweise
|
||||||
|
--------
|
||||||
|
- Nutzt die bestehenden Collections <prefix>_notes/_chunks/_edges.
|
||||||
|
- Edges werden über Payload-Felder (`kind`, `source_id`, `target_id`) abgefragt.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import List, Dict, Any, Optional, Iterable, Set, Tuple
|
||||||
|
from qdrant_client.http import models as rest
|
||||||
|
from app.core.qdrant import QdrantConfig, get_client
|
||||||
|
|
||||||
|
def _cols(prefix: str):
|
||||||
|
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
||||||
|
|
||||||
|
class GraphService:
|
||||||
|
def __init__(self, cfg: Optional[QdrantConfig] = None, prefix: Optional[str] = None):
|
||||||
|
self.cfg = cfg or QdrantConfig.from_env()
|
||||||
|
if prefix:
|
||||||
|
self.cfg.prefix = prefix
|
||||||
|
self.client = get_client(self.cfg)
|
||||||
|
self.notes_col, self.chunks_col, self.edges_col = _cols(self.cfg.prefix)
|
||||||
|
|
||||||
|
# ------------------------ fetch helpers ------------------------
|
||||||
|
def _scroll(self, col: str, flt: Optional[rest.Filter] = None, limit: int = 256):
|
||||||
|
out = []
|
||||||
|
nextp = None
|
||||||
|
while True:
|
||||||
|
pts, nextp = self.client.scroll(
|
||||||
|
collection_name=col,
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False,
|
||||||
|
limit=limit,
|
||||||
|
offset=nextp,
|
||||||
|
scroll_filter=flt,
|
||||||
|
)
|
||||||
|
if not pts:
|
||||||
|
break
|
||||||
|
out.extend(pts)
|
||||||
|
if nextp is None:
|
||||||
|
break
|
||||||
|
return out
|
||||||
|
|
||||||
|
# ------------------------ public API ---------------------------
|
||||||
|
def get_note(self, note_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
|
pts, _ = self.client.scroll(self.notes_col, with_payload=True, with_vectors=False, limit=1, scroll_filter=f)
|
||||||
|
return (pts[0].payload or None) if pts else None
|
||||||
|
|
||||||
|
def get_chunks(self, note_id: str) -> List[Dict[str, Any]]:
|
||||||
|
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
|
pts = self._scroll(self.chunks_col, f)
|
||||||
|
# Sortierung analog Export
|
||||||
|
def key(pl):
|
||||||
|
p = pl.payload or {}
|
||||||
|
s = p.get("seq") or 0
|
||||||
|
ci = p.get("chunk_index") or 0
|
||||||
|
n = 0
|
||||||
|
cid = p.get("chunk_id") or ""
|
||||||
|
if isinstance(cid, str) and "#" in cid:
|
||||||
|
try:
|
||||||
|
n = int(cid.rsplit("#", 1)[-1])
|
||||||
|
except Exception:
|
||||||
|
n = 0
|
||||||
|
return (int(s), int(ci), n)
|
||||||
|
pts_sorted = sorted(pts, key=key)
|
||||||
|
return [p.payload or {} for p in pts_sorted]
|
||||||
|
|
||||||
|
def neighbors(self, source_id: str, kinds: Optional[Iterable[str]] = None,
|
||||||
|
scope: Optional[Iterable[str]] = None, depth: int = 1) -> Dict[str, List[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
Liefert eingehende & ausgehende Nachbarn (nur nach kind gefiltert).
|
||||||
|
depth==1: direkte Kanten.
|
||||||
|
"""
|
||||||
|
kinds = list(kinds) if kinds else None
|
||||||
|
must = [rest.FieldCondition(key="source_id", match=rest.MatchValue(value=source_id))]
|
||||||
|
if kinds:
|
||||||
|
must.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds)))
|
||||||
|
f = rest.Filter(must=must)
|
||||||
|
edges = self._scroll(self.edges_col, f)
|
||||||
|
out = {"out": [], "in": []}
|
||||||
|
for e in edges:
|
||||||
|
out["out"].append(e.payload or {})
|
||||||
|
# Inverse Richtung (eingehend)
|
||||||
|
must_in = [rest.FieldCondition(key="target_id", match=rest.MatchValue(value=source_id))]
|
||||||
|
if kinds:
|
||||||
|
must_in.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds)))
|
||||||
|
f_in = rest.Filter(must=must_in)
|
||||||
|
edges_in = self._scroll(self.edges_col, f_in)
|
||||||
|
for e in edges_in:
|
||||||
|
out["in"].append(e.payload or {})
|
||||||
|
return out
|
||||||
|
|
||||||
|
def walk_bfs(self, source_id: str, kinds: Iterable[str], max_depth: int = 2) -> Set[str]:
|
||||||
|
visited: Set[str] = {source_id}
|
||||||
|
frontier: Set[str] = {source_id}
|
||||||
|
kinds = list(kinds)
|
||||||
|
for _ in range(max_depth):
|
||||||
|
nxt: Set[str] = set()
|
||||||
|
for s in frontier:
|
||||||
|
neigh = self.neighbors(s, kinds=kinds)
|
||||||
|
for e in neigh["out"]:
|
||||||
|
t = e.get("target_id")
|
||||||
|
if isinstance(t, str) and t not in visited:
|
||||||
|
visited.add(t)
|
||||||
|
nxt.add(t)
|
||||||
|
frontier = nxt
|
||||||
|
if not frontier:
|
||||||
|
break
|
||||||
|
return visited
|
||||||
|
|
||||||
|
def context_for_note(self, note_id: str, kinds: Iterable[str] = ("references","backlink"), max_neighbors: int = 12) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Heuristischer Kontext: eigene Chunks + Nachbarn nach Kantenarten, dedupliziert.
|
||||||
|
"""
|
||||||
|
note = self.get_note(note_id) or {}
|
||||||
|
chunks = self.get_chunks(note_id)
|
||||||
|
neigh = self.neighbors(note_id, kinds=list(kinds))
|
||||||
|
targets = []
|
||||||
|
for e in neigh["out"]:
|
||||||
|
t = e.get("target_id")
|
||||||
|
if isinstance(t, str):
|
||||||
|
targets.append(t)
|
||||||
|
for e in neigh["in"]:
|
||||||
|
s = e.get("source_id")
|
||||||
|
if isinstance(s, str):
|
||||||
|
targets.append(s)
|
||||||
|
# de-dupe
|
||||||
|
seen = set()
|
||||||
|
uniq = []
|
||||||
|
for t in targets:
|
||||||
|
if t not in seen:
|
||||||
|
seen.add(t)
|
||||||
|
uniq.append(t)
|
||||||
|
uniq = uniq[:max_neighbors]
|
||||||
|
neighbor_notes = [self.get_note(t) for t in uniq]
|
||||||
|
return {
|
||||||
|
"note": note,
|
||||||
|
"chunks": chunks,
|
||||||
|
"neighbors": [n for n in neighbor_notes if n],
|
||||||
|
"edges_out": neigh["out"],
|
||||||
|
"edges_in": neigh["in"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Optional: Mini-CLI
|
||||||
|
if __name__ == "__main__": # pragma: no cover
|
||||||
|
import argparse, json
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--prefix", help="Collection-Prefix (überschreibt ENV)")
|
||||||
|
ap.add_argument("--note-id", required=True)
|
||||||
|
ap.add_argument("--neighbors", action="store_true", help="Nur Nachbarn anzeigen")
|
||||||
|
args = ap.parse_args()
|
||||||
|
svc = GraphService(prefix=args.prefix)
|
||||||
|
if args.neighbors:
|
||||||
|
out = svc.neighbors(args.note_id, kinds=["references","backlink","prev","next","belongs_to"])
|
||||||
|
else:
|
||||||
|
out = svc.context_for_note(args.note_id)
|
||||||
|
print(json.dumps(out, ensure_ascii=False, indent=2))
|
||||||
Loading…
Reference in New Issue
Block a user