löschen von Zombies

This commit is contained in:
Lars 2025-12-15 16:07:58 +01:00
parent 9025af62f0
commit 60092b378b
10 changed files with 4 additions and 919 deletions

View File

@ -1,296 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Modul: app/core/edges.py
Version: 2.0.0 (V2superset, rückwärtskompatibel zu v1 vom 20250909)
Zweck
-----
Bewahrt die bestehende Edgelogik (belongs_to, prev/next, references, backlink)
und ergänzt V2Felder + TypDefaultKanten gemäß config/types.yaml (edge_defaults).
Die Funktion ist **idempotent** und **rückwärtskompatibel** zur bisherigen Signatur.
Kompatibilitätsgarantien (gegenüber v1):
- **Input**: akzeptiert identische ChunkPayloads wie v1:
* `id` (ChunkID), `note_id` (Owner), `neighbors.prev|next` (optional),
`references: [{target_id: ...}]` (optional),
alternativ: `chunk_id`, `chunk_index|ord`, `window|text`
- **Output (v1Felder)**: `kind`, `source_id`, `target_id`, `scope`, `note_id`, `edge_id`
- **Neu (v2Felder)**: `relation`, `src_note_id`, `src_chunk_id?`, `dst_note_id`, `dst_chunk_id?`,
`provenance` (`explicit|rule`), `rule_id?`, `confidence?`
Regeln
------
- Deduplizierungsschlüssel: (source_id, target_id, relation, rule_id)
- Strukturkanten:
* belongs_to: 1× pro Chunk
* next/prev: Sequenz der Chunks; nutzt bevorzugt neighbors; sonst ord/chunk_index
- Explizite Referenzen:
* aus Chunk: `references[].target_id` (falls vorhanden)
* Fallback: Wikilinks in `window|text`: [[Some Title|some-id]] oder [[some-id]]
- NoteScope:
* backlink immer; references nur, wenn include_note_scope_refs=True
- TypDefaults (edge_defaults aus config/types.yaml des **QuellNotiztyps**):
* Für jede explizite Referenz wird je defaultRelation eine RegelKante erzeugt
* rule_id: "type_default:{note_type}:{relation}:v1", provenance="rule"
Konfiguration
-------------
- ENV MINDNET_TYPES_FILE (Default: ./config/types.yaml)
Lizenz/Autor
------------
- Erstimplementierung v1 (20250909) Projekt Mindnet
- Erweiterung v2 (20251111) kompatible SupersetImplementierung
"""
from __future__ import annotations
import os
import re
from typing import Dict, Iterable, List, Optional, Tuple, Set
try:
import yaml # optional, nur für types.yaml
except Exception: # pragma: no cover
yaml = None
# ------------------------------------------------------------
# Hilfen: types.yaml laden (edge_defaults)
# ------------------------------------------------------------
def _types_path() -> str:
return os.getenv("MINDNET_TYPES_FILE") or "./config/types.yaml"
def _load_types() -> Dict[str, dict]:
p = _types_path()
if not os.path.isfile(p) or yaml is None:
return {}
try:
with open(p, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
if isinstance(data, dict) and "types" in data and isinstance(data["types"], dict):
return data["types"]
return data if isinstance(data, dict) else {}
except Exception:
return {}
def _edge_defaults_for(note_type: Optional[str]) -> List[str]:
types = _load_types()
t = (note_type or "").strip().lower()
cfg = types.get(t) or {}
defaults = cfg.get("edge_defaults") or []
if isinstance(defaults, str):
defaults = [defaults]
return [str(x) for x in defaults if isinstance(x, (str, int, float))]
# ------------------------------------------------------------
# WikilinkParser (Fallback, wenn ch["references"] fehlt)
# ------------------------------------------------------------
_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]")
def _extract_wikilinks(text: str) -> List[str]:
ids: List[str] = []
for m in _WIKILINK_RE.finditer(text or ""):
ids.append(m.group(1).strip())
return ids
# ------------------------------------------------------------
# Utility
# ------------------------------------------------------------
def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None) -> str:
base = f"{kind}:{s}->{t}#{scope}"
if rule_id:
base += f"|{rule_id}"
try:
import hashlib
return hashlib.blake2s(base.encode("utf-8"), digest_size=12).hexdigest()
except Exception: # pragma: no cover
return base
def _dedupe(edges: List[Dict]) -> List[Dict]:
seen: Set[Tuple[str,str,str,str]] = set()
out: List[Dict] = []
for e in edges:
s = str(e.get("source_id") or "")
t = str(e.get("target_id") or "")
rel = str(e.get("relation") or e.get("kind") or "edge")
rule = str(e.get("rule_id") or "")
key = (s, t, rel, rule)
if key in seen:
continue
seen.add(key)
out.append(e)
return out
def _first(v: dict, *keys, default=None):
for k in keys:
if k in v and v[k] is not None:
return v[k]
return default
# ------------------------------------------------------------
# Hauptfunktion
# ------------------------------------------------------------
def build_edges_for_note(
note_id: str,
chunk_payloads: List[Dict],
note_level_refs: Optional[List[str]] = None,
*,
include_note_scope_refs: bool = False,
) -> List[Dict]:
edges: List[Dict] = []
chunks = list(chunk_payloads or [])
# Notiztyp aus erstem Chunk ableiten (kompatibel zu existierenden Payloads)
note_type = (chunks[0].get("type") if chunks else None) or (chunks[0].get("note_type") if chunks else None)
# --- Strukturkanten ------------------------------------------------------
# belongs_to
for ch in chunks:
cid = _first(ch, "id", "chunk_id")
if not cid:
continue
owner = ch.get("note_id") or note_id
e = {
"edge_id": _mk_edge_id("belongs_to", cid, note_id, "chunk", "structure:belongs_to:v1"),
"kind": "belongs_to",
"relation": "belongs_to",
"scope": "chunk",
"source_id": cid,
"target_id": note_id,
"note_id": owner, # v1-Kompat
# v2
"src_note_id": owner,
"src_chunk_id": cid,
"dst_note_id": note_id,
"provenance": "rule",
"rule_id": "structure:belongs_to:v1",
"confidence": 1.0,
}
edges.append(e)
# next/prev — bevorzugt neighbors.prev/next; sonst via ord/chunk_index
# Map der Chunks nach Index
ordered = list(chunks)
def _idx(c):
return _first(c, "chunk_index", "ord", default=0)
ordered.sort(key=_idx)
for i, ch in enumerate(ordered):
cid = _first(ch, "id", "chunk_id")
if not cid:
continue
owner = ch.get("note_id") or note_id
nb = ch.get("neighbors") or {}
prev_id = nb.get("prev")
next_id = nb.get("next")
# Fallback-Reihenfolge
if prev_id is None and i > 0:
prev_id = _first(ordered[i-1], "id", "chunk_id")
if next_id is None and i+1 < len(ordered):
next_id = _first(ordered[i+1], "id", "chunk_id")
if prev_id:
edges.append({
"edge_id": _mk_edge_id("prev", cid, prev_id, "chunk", "structure:order:v1"),
"kind": "prev", "relation": "prev", "scope": "chunk",
"source_id": cid, "target_id": prev_id, "note_id": owner,
"src_note_id": owner, "src_chunk_id": cid,
"dst_note_id": owner, "dst_chunk_id": prev_id,
"provenance": "rule", "rule_id": "structure:order:v1", "confidence": 0.95,
})
edges.append({
"edge_id": _mk_edge_id("next", prev_id, cid, "chunk", "structure:order:v1"),
"kind": "next", "relation": "next", "scope": "chunk",
"source_id": prev_id, "target_id": cid, "note_id": owner,
"src_note_id": owner, "src_chunk_id": prev_id,
"dst_note_id": owner, "dst_chunk_id": cid,
"provenance": "rule", "rule_id": "structure:order:v1", "confidence": 0.95,
})
# --- Explizite Referenzen (ChunkScope) ---------------------------------
explicit_refs: List[Dict] = []
for ch in chunks:
cid = _first(ch, "id", "chunk_id")
if not cid:
continue
owner = ch.get("note_id") or note_id
# 1) bevorzugt vorhandene ch["references"]
refs = ch.get("references") or []
targets = [r.get("target_id") for r in refs if isinstance(r, dict) and r.get("target_id")]
# 2) Fallback: Wikilinks aus Text
if not targets:
text = _first(ch, "window", "text", default="") or ""
targets = _extract_wikilinks(text)
for tid in targets:
if not isinstance(tid, str) or not tid.strip():
continue
e = {
"edge_id": _mk_edge_id("references", cid, tid, "chunk"),
"kind": "references",
"relation": "references",
"scope": "chunk",
"source_id": cid,
"target_id": tid,
"note_id": owner,
# v2
"src_note_id": owner,
"src_chunk_id": cid,
"dst_note_id": tid,
"provenance": "explicit",
"rule_id": "",
"confidence": 1.0,
}
edges.append(e)
explicit_refs.append(e)
# --- NoteScope: references (optional) + backlink (immer) ----------------
unique_refs = []
if note_level_refs:
seen = set()
for tid in note_level_refs:
if isinstance(tid, str) and tid.strip() and tid not in seen:
unique_refs.append(tid); seen.add(tid)
for tid in unique_refs:
if include_note_scope_refs:
edges.append({
"edge_id": _mk_edge_id("references", note_id, tid, "note"),
"kind": "references", "relation": "references", "scope": "note",
"source_id": note_id, "target_id": tid, "note_id": note_id,
"src_note_id": note_id, "dst_note_id": tid,
"provenance": "explicit", "rule_id": "", "confidence": 1.0,
})
edges.append({
"edge_id": _mk_edge_id("backlink", tid, note_id, "note", "derived:backlink:v1"),
"kind": "backlink", "relation": "backlink", "scope": "note",
"source_id": tid, "target_id": note_id, "note_id": note_id,
"src_note_id": tid, "dst_note_id": note_id,
"provenance": "rule", "rule_id": "derived:backlink:v1", "confidence": 0.9,
})
# --- TypeDefaults je expliziter Referenz --------------------------------
defaults = [d for d in _edge_defaults_for(note_type) if d and d != "references"]
if defaults:
for e in explicit_refs + ([ ] if not include_note_scope_refs else []):
# wir nutzen die bereits erzeugten explicitEdges als Vorlage
src = e["source_id"]; tgt = e["target_id"]
scope = e.get("scope", "chunk")
s_note = e.get("src_note_id") or note_id
s_chunk = e.get("src_chunk_id")
t_note = e.get("dst_note_id") or tgt
for rel in defaults:
rule_id = f"type_default:{(note_type or 'unknown')}:{rel}:v1"
edges.append({
"edge_id": _mk_edge_id(rel, src, tgt, scope, rule_id),
"kind": rel, "relation": rel, "scope": scope,
"source_id": src, "target_id": tgt, "note_id": s_note,
"src_note_id": s_note, "src_chunk_id": s_chunk,
"dst_note_id": t_note,
"provenance": "rule", "rule_id": rule_id, "confidence": 0.7,
})
# --- Dedupe & Return -----------------------------------------------------
return _dedupe(edges)

View File

@ -1,94 +0,0 @@
# app/core/edges_writer.py
from __future__ import annotations
import hashlib
from typing import Dict, List, Iterable, Tuple
try:
# Dein Modul mit der Schemadefinition und der Builder-Funktion
from app.core.edges import build_edges_for_note # noqa: F401
except Exception as e:
raise RuntimeError("Konnte app.core.edges nicht importieren. "
"Bitte sicherstellen, dass app/core/edges.py vorhanden ist.") from e
def _edge_uid(kind: str, source_id: str, target_id: str, scope: str) -> str:
"""
Deterministische, kurze ID für eine Edge.
Kollisionen sind praktisch ausgeschlossen (BLAKE2s über den Kanonischen Schlüssel).
"""
key = f"{kind}|{source_id}|{target_id}|{scope}"
return hashlib.blake2s(key.encode("utf-8"), digest_size=12).hexdigest()
def ensure_edges_collection(qdrant_client, collection: str) -> None:
"""
Legt die Edge-Collection an, falls sie nicht existiert.
Minimal: 1D-Vector (Dummy), Cosine. Payload-only-Collections sind je nach Qdrant-Version heikel.
"""
from qdrant_client.http import models as qm
existing = [c.name for c in qdrant_client.get_collections().collections]
if collection in existing:
return
qdrant_client.recreate_collection(
collection_name=collection,
vectors_config=qm.VectorParams(size=1, distance=qm.Distance.COSINE),
on_disk_payload=True,
)
def edges_from_note(
note_id: str,
chunk_payloads: List[Dict],
note_level_refs: Iterable[str] | None,
*,
include_note_scope_refs: bool = False,
) -> List[Dict]:
"""
Ruft deinen Edge-Builder auf und gibt die (deduplizierten) Edge-Payloads zurück.
Keine Schemaänderung exakt das aus app/core/edges.py.
"""
return build_edges_for_note(
note_id=note_id,
chunk_payloads=chunk_payloads,
note_level_refs=list(note_level_refs or []),
include_note_scope_refs=include_note_scope_refs,
)
def upsert_edges(
qdrant_client,
collection: str,
edge_payloads: List[Dict],
) -> Tuple[int, int]:
"""
Schreibt Edges als Points in Qdrant.
- id: deterministisch aus (kind, source_id, target_id, scope)
- vector: [0.0] Dummy
- payload: Edge-Dict (unverändert, siehe Schema in app/core/edges.py)
Gibt (anzahl_points, anzahl_unique_keys) zurück.
"""
from qdrant_client.models import PointStruct
if not edge_payloads:
return 0, 0
points = []
seen = set()
for e in edge_payloads:
key = (e.get("kind"), e.get("source_id"), e.get("target_id"), e.get("scope"))
if key in seen:
continue
seen.add(key)
eid = _edge_uid(*key)
points.append(
PointStruct(
id=eid,
vector=[0.0],
payload=e,
)
)
if not points:
return 0, 0
ensure_edges_collection(qdrant_client, collection)
qdrant_client.upsert(collection_name=collection, points=points)
return len(points), len(seen)

View File

@ -1,103 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Datei: app/core/env_vars.py
Version: 1.1.0 (2025-11-08)
Zweck
Einheitliche Auflösung von ENV-Variablen (Prefix, Qdrant, Embeddings, Hashing)
mit Abwärtskompatibilität.
Grundsatz
- Für Qdrant-Funktionen ist 'COLLECTION_PREFIX' der Primärschlüssel.
- 'MINDNET_PREFIX' bleibt für App-/UI-/Exporter-Kontexte nutzbar.
- Fallbacks sorgen dafür, dass ältere Umgebungen weiter funktionieren.
Wichtig
- Lädt optional eine .env (wenn python-dotenv verfügbar ist).
- Überschreibt keine bereits gesetzten OS-Variablen (override=False).
"""
from __future__ import annotations
import os
from typing import Optional, Dict
# Optional: .env automatisch laden (ohne Hard-Fail, falls nicht vorhanden)
try:
from dotenv import load_dotenv, find_dotenv # type: ignore
_p = find_dotenv()
if _p:
load_dotenv(_p, override=False)
except Exception:
pass
# -------- Prefix-Auflösung --------
def get_collection_prefix(cli_override: Optional[str] = None) -> str:
"""
Für Qdrant-relevante Funktionen:
1) CLI-Override (--prefix)
2) ENV COLLECTION_PREFIX
3) ENV MINDNET_PREFIX (Fallback)
4) 'mindnet' (Default)
"""
if cli_override and str(cli_override).strip():
return str(cli_override).strip()
return (
os.getenv("COLLECTION_PREFIX")
or os.getenv("MINDNET_PREFIX")
or "mindnet"
)
def get_mindnet_prefix(cli_override: Optional[str] = None) -> str:
"""
Für App-/UI-/Exporter-Kontexte:
1) CLI-Override (--prefix)
2) ENV MINDNET_PREFIX
3) ENV COLLECTION_PREFIX (Fallback)
4) 'mindnet'
"""
if cli_override and str(cli_override).strip():
return str(cli_override).strip()
return (
os.getenv("MINDNET_PREFIX")
or os.getenv("COLLECTION_PREFIX")
or "mindnet"
)
def get_prefix(cli_override: Optional[str] = None, target: str = "qdrant") -> str:
"""
Universelle Hülle (abwärtskompatibel):
target='qdrant' -> get_collection_prefix
target='app' -> get_mindnet_prefix
"""
if target.lower() == "app":
return get_mindnet_prefix(cli_override)
return get_collection_prefix(cli_override)
# -------- Qdrant / Embeddings / Hashing --------
def get_qdrant_url(default: str = "http://127.0.0.1:6333") -> str:
return os.getenv("QDRANT_URL", default)
def get_qdrant_api_key(default: str = "") -> str:
return os.getenv("QDRANT_API_KEY", default)
def get_vector_dim(default: int = 384) -> int:
try:
return int(os.getenv("VECTOR_DIM", str(default)))
except Exception:
return default
def get_embed_url(default: Optional[str] = None) -> Optional[str]:
return os.getenv("EMBED_URL", default)
def get_hash_env() -> Dict[str, str]:
"""
Liefert die Hash-Konfiguration (nur Aggregation; die Auswertung bleibt in den Skripten).
"""
return {
"MINDNET_HASH_COMPARE": os.getenv("MINDNET_HASH_COMPARE", ""),
"MINDNET_HASH_SOURCE": os.getenv("MINDNET_HASH_SOURCE", ""),
"MINDNET_HASH_NORMALIZE": os.getenv("MINDNET_HASH_NORMALIZE", ""),
}

View File

@ -1,56 +0,0 @@
"""
app/core/ranking.py Kombiniertes Scoring (WP-04)
Zweck:
Zusammenführen von semantischem Score (normalisiert), Edge-Bonus und
Centrality-Bonus in einen Gesamtscore für die Ergebnisreihung.
Kompatibilität:
Python 3.12+
Version:
0.1.0 (Erstanlage)
Stand:
2025-10-07
Bezug:
WP-04 Ranking-Formel (w_sem, w_edge, w_cent)
Nutzung:
from app.core.ranking import combine_scores
Änderungsverlauf:
0.1.0 (2025-10-07) Erstanlage.
"""
from __future__ import annotations
from typing import List, Tuple, Dict
def normalize_scores(values: List[float]) -> List[float]:
"""Min-Max-Normalisierung über die Kandidatenmenge (Fallback 0.5 bei Konstanz)."""
if not values:
return values
lo, hi = min(values), max(values)
if hi - lo < 1e-9:
return [0.5] * len(values)
return [(v - lo) / (hi - lo) for v in values]
def combine_scores(
hits: List[Tuple[str, float, dict]], # (id, semantic_score, payload)
edge_bonus_map: Dict[str, float],
centrality_map: Dict[str, float],
w_sem: float = 0.70,
w_edge: float = 0.25,
w_cent: float = 0.05,
) -> List[Tuple[str, float, float, float, float]]:
"""
Liefert Liste von (point_id, total_score, edge_bonus, centrality_bonus, raw_semantic_score),
absteigend nach total_score sortiert.
"""
sem = [h[1] for h in hits]
sem_n = normalize_scores(sem)
out = []
for (pid, s, payload), s_norm in zip(hits, sem_n):
e = edge_bonus_map.get(pid, 0.0)
c = centrality_map.get(pid, 0.0)
total = w_sem * s_norm + w_edge * e + w_cent * c
out.append((pid, total, e, c, s))
out.sort(key=lambda t: t[1], reverse=True)
return out

View File

@ -1,116 +0,0 @@
"""app/core/retriever_config.py
---------------------------------
Zentrale Konfiguration für den mindnet-Retriever (WP-04).
Zweck:
- Lädt config/retriever.yaml (falls vorhanden) oder nutzt sinnvolle Defaults.
- Bietet einen gecachten Zugriff auf die Retriever-Config für
andere Module (z. B. graph_adapter, retriever).
Hinweis zur Weiterentwicklung (Selbstjustierung):
- Die hier definierten Parameter sind so gewählt, dass sie später
durch ein Feedback-/Learning-to-Rank-Modell überschrieben werden
können, ohne die restliche Architektur anzupassen.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Dict
try:
import yaml # type: ignore
except Exception: # pragma: no cover - Fallback, falls PyYAML nicht installiert ist.
yaml = None # type: ignore
@dataclass(frozen=True)
class RetrieverConfig:
semantic_scale: float
edge_scale: float
centrality_scale: float
edge_weights: Dict[str, float]
@lru_cache
def get_retriever_config() -> RetrieverConfig:
"""Lädt die Retriever-Konfiguration (YAML + Defaults).
Reihenfolge:
1. Defaults (sinnvoll gewählte Startwerte).
2. Optional: config/retriever.yaml bzw. Pfad aus ENV
MINDNET_RETRIEVER_CONFIG überschreibt die Defaults.
Die Funktion ist bewusst gecached, da sich die Konfiguration zur
Laufzeit üblicherweise nicht ändert. Für dynamisches Nachladen
könnte der Cache explizit geleert werden.
"""
# 1) Defaults bewusst konservativ gewählt.
semantic_scale = 1.0
edge_scale = 1.0
centrality_scale = 1.0
edge_weights: Dict[str, float] = {
# Wissens-Kanten
"depends_on": 1.0,
"related_to": 0.7,
"similar_to": 0.7,
"references": 0.5,
# Struktur-Kanten
"belongs_to": 0.2,
"next": 0.1,
"prev": 0.1,
# Sonstige / technische Kanten
"backlink": 0.2,
"references_at": 0.2,
}
# 2) Optional: YAML-Konfiguration laden
cfg_path_env = os.getenv("MINDNET_RETRIEVER_CONFIG")
if cfg_path_env:
cfg_path = Path(cfg_path_env)
else:
# Project-Root = zwei Ebenen über app/core/
cfg_path = Path(__file__).resolve().parents[2] / "config" / "retriever.yaml"
if yaml is not None and cfg_path.exists():
try:
data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
except Exception:
data = {}
retr = data.get("retriever") or {}
# Skalenwerte überschreiben, falls angegeben
try:
semantic_scale = float(retr.get("semantic_scale", semantic_scale))
except (TypeError, ValueError):
pass
try:
edge_scale = float(retr.get("edge_scale", edge_scale))
except (TypeError, ValueError):
pass
try:
centrality_scale = float(retr.get("centrality_scale", centrality_scale))
except (TypeError, ValueError):
pass
# Edge-Gewichte je Kanten-Typ
ew_cfg = retr.get("edge_weights") or {}
if isinstance(ew_cfg, dict):
for k, v in ew_cfg.items():
try:
edge_weights[str(k)] = float(v)
except (TypeError, ValueError):
continue
return RetrieverConfig(
semantic_scale=semantic_scale,
edge_scale=edge_scale,
centrality_scale=centrality_scale,
edge_weights=edge_weights,
)

View File

@ -1,22 +0,0 @@
from __future__ import annotations
import json
import os
from functools import lru_cache
from jsonschema import Draft202012Validator, RefResolver
SCHEMAS_DIR = os.getenv("SCHEMAS_DIR", os.path.join(os.path.dirname(os.path.dirname(__file__)), "..", "schemas"))
@lru_cache(maxsize=16)
def load_schema(name: str) -> dict:
# name: "note.schema.json" | "chunk.schema.json" | "edge.schema.json"
path = os.path.join(SCHEMAS_DIR, name)
if not os.path.isfile(path):
raise FileNotFoundError(f"Schema not found: {path}")
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
@lru_cache(maxsize=16)
def get_validator(name: str) -> Draft202012Validator:
schema = load_schema(name)
resolver = RefResolver.from_schema(schema)
return Draft202012Validator(schema, resolver=resolver)

View File

@ -1,16 +0,0 @@
from __future__ import annotations
from typing import Dict
from jsonschema import ValidationError
from .schema_loader import get_validator
NOTE_SCHEMA_NAME = "note.schema.json"
def validate_note_payload(payload: Dict) -> None:
validator = get_validator(NOTE_SCHEMA_NAME)
errors = sorted(validator.iter_errors(payload), key=lambda e: e.path)
if errors:
msgs = []
for e in errors:
loc = ".".join([str(x) for x in e.path]) or "<root>"
msgs.append(f"{loc}: {e.message}")
raise ValidationError(" | ".join(msgs))

View File

@ -1,40 +0,0 @@
"""
Version 1
"""
from __future__ import annotations
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from sentence_transformers import SentenceTransformer
app = FastAPI(title="mindnet-embed", version="1.0")
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-dim
_model: SentenceTransformer | None = None
class EmbedIn(BaseModel):
model: Optional[str] = None
inputs: List[str]
class EmbedOut(BaseModel):
embeddings: List[List[float]]
@app.on_event("startup")
def _load_model():
global _model
_model = SentenceTransformer(MODEL_NAME)
@app.get("/health")
def health():
return {"ok": True, "model": MODEL_NAME, "dim": 384}
@app.post("/embed", response_model=EmbedOut)
def embed(payload: EmbedIn) -> EmbedOut:
if _model is None:
raise HTTPException(status_code=503, detail="Model not loaded")
if not payload.inputs:
return EmbedOut(embeddings=[])
vecs = _model.encode(payload.inputs, normalize_embeddings=False).tolist()
if any(len(v) != 384 for v in vecs):
raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)")
return EmbedOut(embeddings=vecs)

View File

@ -1,172 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Modul: app/graph/service.py
Version: 0.1.0
Datum: 2025-09-10
Zweck
-----
Leichtgewichtiger Graph-Layer über Qdrant:
- get_note(note_id)
- get_chunks(note_id)
- neighbors(source_id, kinds=[...], scope=['note','chunk'], depth=1)
- walk_bfs(source_id, kinds, max_depth)
- context_for_note(note_id, max_neighbors): heuristische Kontextsammlung
Hinweise
--------
- Nutzt die bestehenden Collections <prefix>_notes/_chunks/_edges.
- Edges werden über Payload-Felder (`kind`, `source_id`, `target_id`) abgefragt.
"""
from __future__ import annotations
from typing import List, Dict, Any, Optional, Iterable, Set, Tuple
from qdrant_client.http import models as rest
from app.core.qdrant import QdrantConfig, get_client
def _cols(prefix: str):
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
class GraphService:
def __init__(self, cfg: Optional[QdrantConfig] = None, prefix: Optional[str] = None):
self.cfg = cfg or QdrantConfig.from_env()
if prefix:
self.cfg.prefix = prefix
self.client = get_client(self.cfg)
self.notes_col, self.chunks_col, self.edges_col = _cols(self.cfg.prefix)
# ------------------------ fetch helpers ------------------------
def _scroll(self, col: str, flt: Optional[rest.Filter] = None, limit: int = 256):
out = []
nextp = None
while True:
pts, nextp = self.client.scroll(
collection_name=col,
with_payload=True,
with_vectors=False,
limit=limit,
offset=nextp,
scroll_filter=flt,
)
if not pts:
break
out.extend(pts)
if nextp is None:
break
return out
# ------------------------ public API ---------------------------
def get_note(self, note_id: str) -> Optional[Dict[str, Any]]:
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
pts, _ = self.client.scroll(self.notes_col, with_payload=True, with_vectors=False, limit=1, scroll_filter=f)
return (pts[0].payload or None) if pts else None
def get_chunks(self, note_id: str) -> List[Dict[str, Any]]:
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
pts = self._scroll(self.chunks_col, f)
# Sortierung analog Export
def key(pl):
p = pl.payload or {}
s = p.get("seq") or 0
ci = p.get("chunk_index") or 0
n = 0
cid = p.get("chunk_id") or ""
if isinstance(cid, str) and "#" in cid:
try:
n = int(cid.rsplit("#", 1)[-1])
except Exception:
n = 0
return (int(s), int(ci), n)
pts_sorted = sorted(pts, key=key)
return [p.payload or {} for p in pts_sorted]
def neighbors(self, source_id: str, kinds: Optional[Iterable[str]] = None,
scope: Optional[Iterable[str]] = None, depth: int = 1) -> Dict[str, List[Dict[str, Any]]]:
"""
Liefert eingehende & ausgehende Nachbarn (nur nach kind gefiltert).
depth==1: direkte Kanten.
"""
kinds = list(kinds) if kinds else None
must = [rest.FieldCondition(key="source_id", match=rest.MatchValue(value=source_id))]
if kinds:
must.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds)))
f = rest.Filter(must=must)
edges = self._scroll(self.edges_col, f)
out = {"out": [], "in": []}
for e in edges:
out["out"].append(e.payload or {})
# Inverse Richtung (eingehend)
must_in = [rest.FieldCondition(key="target_id", match=rest.MatchValue(value=source_id))]
if kinds:
must_in.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds)))
f_in = rest.Filter(must=must_in)
edges_in = self._scroll(self.edges_col, f_in)
for e in edges_in:
out["in"].append(e.payload or {})
return out
def walk_bfs(self, source_id: str, kinds: Iterable[str], max_depth: int = 2) -> Set[str]:
visited: Set[str] = {source_id}
frontier: Set[str] = {source_id}
kinds = list(kinds)
for _ in range(max_depth):
nxt: Set[str] = set()
for s in frontier:
neigh = self.neighbors(s, kinds=kinds)
for e in neigh["out"]:
t = e.get("target_id")
if isinstance(t, str) and t not in visited:
visited.add(t)
nxt.add(t)
frontier = nxt
if not frontier:
break
return visited
def context_for_note(self, note_id: str, kinds: Iterable[str] = ("references","backlink"), max_neighbors: int = 12) -> Dict[str, Any]:
"""
Heuristischer Kontext: eigene Chunks + Nachbarn nach Kantenarten, dedupliziert.
"""
note = self.get_note(note_id) or {}
chunks = self.get_chunks(note_id)
neigh = self.neighbors(note_id, kinds=list(kinds))
targets = []
for e in neigh["out"]:
t = e.get("target_id")
if isinstance(t, str):
targets.append(t)
for e in neigh["in"]:
s = e.get("source_id")
if isinstance(s, str):
targets.append(s)
# de-dupe
seen = set()
uniq = []
for t in targets:
if t not in seen:
seen.add(t)
uniq.append(t)
uniq = uniq[:max_neighbors]
neighbor_notes = [self.get_note(t) for t in uniq]
return {
"note": note,
"chunks": chunks,
"neighbors": [n for n in neighbor_notes if n],
"edges_out": neigh["out"],
"edges_in": neigh["in"],
}
# Optional: Mini-CLI
if __name__ == "__main__": # pragma: no cover
import argparse, json
ap = argparse.ArgumentParser()
ap.add_argument("--prefix", help="Collection-Prefix (überschreibt ENV)")
ap.add_argument("--note-id", required=True)
ap.add_argument("--neighbors", action="store_true", help="Nur Nachbarn anzeigen")
args = ap.parse_args()
svc = GraphService(prefix=args.prefix)
if args.neighbors:
out = svc.neighbors(args.note_id, kinds=["references","backlink","prev","next","belongs_to"])
else:
out = svc.context_for_note(args.note_id)
print(json.dumps(out, ensure_ascii=False, indent=2))

View File

@ -10,8 +10,8 @@ LAST_ANALYSIS: 2025-12-15
from __future__ import annotations
from fastapi import FastAPI
from .config import get_settings
from .routers.embed_router import router as embed_router
from .routers.qdrant_router import router as qdrant_router
#from .routers.embed_router import router as embed_router
#from .routers.qdrant_router import router as qdrant_router
from .routers.query import router as query_router
from .routers.graph import router as graph_router
@ -35,8 +35,8 @@ def create_app() -> FastAPI:
def healthz():
return {"status": "ok", "qdrant": s.QDRANT_URL, "prefix": s.COLLECTION_PREFIX}
app.include_router(embed_router)
app.include_router(qdrant_router)
# app.include_router(embed_router)
# app.include_router(qdrant_router)
app.include_router(query_router, prefix="/query", tags=["query"])
app.include_router(graph_router, prefix="/graph", tags=["graph"])