prune

2025-12-15 17:55:53 +01:00 · 2025-12-15 17:55:53 +01:00 · 7263fee4c7
commit 7263fee4c7
parent 4204c2c974
6 changed files with 0 additions and 496 deletions
--- a/app/core/embed.py
+++ b/app/core/embed.py
@ -1,82 +0,0 @@
-from __future__ import annotations
-import os, time, json
-import urllib.request
-from typing import List, Dict, Any
-
-# Backend-Auswahl:
-# - EMBED_BACKEND=ollama  -> EMBED_URL=/api/embeddings (Ollama), EMBED_MODEL=z.B. nomic-embed-text
-# - EMBED_BACKEND=mini    -> EMBED_URL=/embed (unser MiniLM-Server),   EMBED_MODEL=minilm-384
-EMBED_BACKEND = os.getenv("EMBED_BACKEND", "mini").lower()
-EMBED_URL     = os.getenv("EMBED_URL", "http://127.0.0.1:8990/embed")
-EMBED_MODEL   = os.getenv("EMBED_MODEL", "minilm-384")
-EMBED_BATCH   = int(os.getenv("EMBED_BATCH", "64"))
-TIMEOUT       = 60
-
-class EmbedError(RuntimeError): ...
-
-def _post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
-    data = json.dumps(payload).encode("utf-8")
-    req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
-        return json.loads(resp.read().decode("utf-8"))
-
-def _embed_mini(inputs: List[str], model: str, batch: int) -> List[List[float]]:
-    out: List[List[float]] = []
-    i = 0
-    while i < len(inputs):
-        chunk = inputs[i:i+batch]
-        # einfache Retries
-        for attempt in range(5):
-            try:
-                resp = _post_json(EMBED_URL, {"model": model, "inputs": chunk})
-                vecs = resp.get("embeddings") or resp.get("vectors") or resp.get("data")
-                if not isinstance(vecs, list):
-                    raise EmbedError(f"Bad embed response keys: {list(resp.keys())}")
-                out.extend(vecs)
-                break
-            except Exception:
-                if attempt == 4:
-                    raise
-                time.sleep(1.5 * (attempt + 1))
-        i += batch
-    return out
-
-def _embed_ollama(inputs: List[str], model: str, batch: int) -> List[List[float]]:
-    # Ollama /api/embeddings akzeptiert "input" als String ODER Array.
-    # Die Response enthält:
-    #  - für single input: {"embedding":[...], "model":"...", ...}
-    #  - für array input:  {"embeddings":[[...],[...],...], "model":"...", ...} (je nach Version)
-    # Um maximal kompatibel zu sein, rufen wir pro Text einzeln auf.
-    out: List[List[float]] = []
-    for text in inputs:
-        # Retries
-        for attempt in range(5):
-            try:
-                resp = _post_json(EMBED_URL, {"model": model, "input": text})
-                if "embedding" in resp and isinstance(resp["embedding"], list):
-                    out.append(resp["embedding"])
-                elif "embeddings" in resp and isinstance(resp["embeddings"], list):
-                    # Falls Server array zurückgibt, nimm das erste Element
-                    vecs = resp["embeddings"]
-                    out.append(vecs[0] if vecs else [])
-                else:
-                    raise EmbedError(f"Ollama response unexpected keys: {list(resp.keys())}")
-                break
-            except Exception:
-                if attempt == 4:
-                    raise
-                time.sleep(1.5 * (attempt + 1))
-    return out
-
-def embed_texts(texts: List[str], model: str | None = None, batch_size: int | None = None) -> List[List[float]]:
-    model = model or EMBED_MODEL
-    batch = batch_size or EMBED_BATCH
-    if not texts:
-        return []
-    if EMBED_BACKEND == "ollama":
-        return _embed_ollama(texts, model, batch)
-    # default: mini
-    return _embed_mini(texts, model, batch)
-
-def embed_one(text: str, model: str | None = None) -> List[float]:
-    return embed_texts([text], model=model, batch_size=1)[0]
--- a/app/routers/qdrant_router.py
+++ b/app/routers/qdrant_router.py
@ -1,160 +0,0 @@
-"""
-Version 0.1
-"""
-
-from __future__ import annotations
-
-from typing import Any, Optional, List
-import uuid
-
-from fastapi import APIRouter
-from pydantic import BaseModel, Field
-from qdrant_client import QdrantClient
-from qdrant_client.http.models import (
-    Distance,
-    VectorParams,
-    PointStruct,
-    Filter,
-    FieldCondition,
-    MatchValue,
-)
-
-from ..config import get_settings
-from ..embeddings import embed_texts
-
-router = APIRouter(prefix="/qdrant", tags=["qdrant"])
-
-def _client() -> QdrantClient:
-    s = get_settings()
-    return QdrantClient(url=s.QDRANT_URL, api_key=s.QDRANT_API_KEY)
-
-def _col(name: str) -> str:
-    return f"{get_settings().COLLECTION_PREFIX}_{name}"
-
-def _uuid5(s: str) -> str:
-    """Deterministic UUIDv5 from arbitrary string (server-side point id)."""
-    return str(uuid.uuid5(uuid.NAMESPACE_URL, s))
-
-# --- Models ---
-class BaseMeta(BaseModel):
-    note_id: str = Field(..., description="Stable ID of the note (e.g., hash of vault-relative path)")
-    title: Optional[str] = Field(None, description="Note or chunk title")
-    path: Optional[str] = Field(None, description="Vault-relative path to the .md file")
-    Typ: Optional[str] = None
-    Status: Optional[str] = None
-    tags: Optional[List[str]] = None
-    Rolle: Optional[List[str]] = None  # allow list
-
-class UpsertChunkRequest(BaseMeta):
-    chunk_id: str = Field(..., description="Stable ID of the chunk within the note")
-    text: str = Field(..., description="Chunk text content")
-    links: Optional[List[str]] = Field(default=None, description="Outbound links detected in the chunk")
-
-class UpsertNoteRequest(BaseMeta):
-    text: Optional[str] = Field(None, description="Full note text (optional)")
-
-class UpsertEdgeRequest(BaseModel):
-    src_note_id: str
-    dst_note_id: Optional[str] = None
-    src_chunk_id: Optional[str] = None
-    dst_chunk_id: Optional[str] = None
-    relation: str = Field(default="links_to")
-    link_text: Optional[str] = None
-
-class QueryRequest(BaseModel):
-    query: str
-    limit: int = 5
-    note_id: Optional[str] = None
-    path: Optional[str] = None
-    tags: Optional[List[str]] = None
-
-# --- Helpers ---
-def _ensure_collections():
-    s = get_settings()
-    cli = _client()
-    # chunks
-    try:
-        cli.get_collection(_col("chunks"))
-    except Exception:
-        cli.recreate_collection(_col("chunks"), vectors_config=VectorParams(size=s.VECTOR_SIZE, distance=Distance.COSINE))
-    # notes
-    try:
-        cli.get_collection(_col("notes"))
-    except Exception:
-        cli.recreate_collection(_col("notes"), vectors_config=VectorParams(size=s.VECTOR_SIZE, distance=Distance.COSINE))
-    # edges (dummy vector of size 1)
-    try:
-        cli.get_collection(_col("edges"))
-    except Exception:
-        cli.recreate_collection(_col("edges"), vectors_config=VectorParams(size=1, distance=Distance.COSINE))
-
-@router.post("/upsert_chunk", summary="Upsert a chunk into mindnet_chunks")
-def upsert_chunk(req: UpsertChunkRequest) -> dict:
-    _ensure_collections()
-    cli = _client()
-    vec = embed_texts([req.text])[0]
-    payload: dict[str, Any] = req.model_dump()
-    payload.pop("text", None)
-    payload["preview"] = (req.text[:240] + "…") if len(req.text) > 240 else req.text
-    qdrant_id = _uuid5(f"chunk:{req.chunk_id}")
-    pt = PointStruct(id=qdrant_id, vector=vec, payload=payload)
-    cli.upsert(collection_name=_col("chunks"), points=[pt])
-    return {"status": "ok", "id": qdrant_id}
-
-@router.post("/upsert_note", summary="Upsert a note into mindnet_notes")
-def upsert_note(req: UpsertNoteRequest) -> dict:
-    _ensure_collections()
-    cli = _client()
-    text_for_embedding = req.text if req.text else (req.title or req.note_id)
-    vec = embed_texts([text_for_embedding])[0]
-    payload: dict[str, Any] = req.model_dump()
-    payload.pop("text", None)
-    qdrant_id = _uuid5(f"note:{req.note_id}")
-    pt = PointStruct(id=qdrant_id, vector=vec, payload=payload)
-    cli.upsert(collection_name=_col("notes"), points=[pt])
-    return {"status": "ok", "id": qdrant_id}
-
-@router.post("/upsert_edge", summary="Upsert a graph edge into mindnet_edges")
-def upsert_edge(req: UpsertEdgeRequest) -> dict:
-    _ensure_collections()
-    cli = _client()
-    payload = req.model_dump()
-    vec = [0.0]
-    raw_edge_id = f"{req.src_note_id}|{req.src_chunk_id or ''}->{req.dst_note_id or ''}|{req.dst_chunk_id or ''}|{req.relation}"
-    qdrant_id = _uuid5(f"edge:{raw_edge_id}")
-    pt = PointStruct(id=qdrant_id, vector=vec, payload=payload)
-    cli.upsert(collection_name=_col("edges"), points=[pt])
-    return {"status": "ok", "id": qdrant_id}
-
-@router.post("/query", summary="Vector query over mindnet_chunks with optional filters")
-def query(req: QueryRequest) -> dict:
-    _ensure_collections()
-    cli = _client()
-    vec = embed_texts([req.query])[0]
-
-    flt: Optional[Filter] = None
-    conds = []
-    if req.note_id:
-        conds.append(FieldCondition(key="note_id", match=MatchValue(value=req.note_id)))
-    if req.path:
-        conds.append(FieldCondition(key="path", match=MatchValue(value=req.path)))
-    if req.tags:
-        for t in req.tags:
-            conds.append(FieldCondition(key="tags", match=MatchValue(value=t)))
-    if conds:
-        flt = Filter(must=conds)
-
-    res = cli.search(collection_name=_col("chunks"), query_vector=vec, limit=req.limit, with_payload=True, with_vectors=False, query_filter=flt)
-    hits = []
-    for p in res:
-        pl = p.payload or {}
-        hits.append({
-            "chunk_id": p.id,
-            "score": p.score,
-            "note_id": pl.get("note_id"),
-            "title": pl.get("title"),
-            "path": pl.get("path"),
-            "preview": pl.get("preview"),
-            "tags": pl.get("tags"),
-        })
-    return {"results": hits}
--- a/app/services/llm_ollama.py
+++ b/app/services/llm_ollama.py
@ -1,88 +0,0 @@
-"""
-app/services/llm_ollama.py — Ollama-Integration & Prompt-Bau (WP-04)
-
-Zweck:
-    Prompt-Template & (optionaler) lokaler Aufruf von Ollama. Der Aufruf ist
-    bewusst gekapselt und kann gefahrlos deaktiviert bleiben, bis ihr ein
-    konkretes Modell konfigurieren wollt.
-Kompatibilität:
-    Python 3.12+
-Version:
-    0.1.0  (Erstanlage)
-Stand:
-    2025-10-07
-Bezug:
-    WP-04/05 Kontextbereitstellung für LLM
-Nutzung:
-    from app.services.llm_ollama import build_prompt, call_ollama
-Änderungsverlauf:
-    0.1.0 (2025-10-07) – Erstanlage.
-"""
-
-from __future__ import annotations
-from typing import List, Dict, Optional
-import subprocess
-import json
-
-PROMPT_TEMPLATE = """System: You are a helpful expert.
-User: {question}
-
-Context (ranked):
-{contexts}
-
-Task: Answer precisely. At the end, list sources (note title + section) and important edge paths.
-"""
-
-
-def build_context_block(items: List[Dict]) -> str:
-    """Formatiert Top-K-Kontexte (Chunks) für den Prompt."""
-    lines = []
-    for i, it in enumerate(items, 1):
-        note = it.get("note_title", "") or it.get("note_id", "")
-        sec = it.get("section", "") or it.get("section_title", "")
-        sc = it.get("score", 0)
-        txt = it.get("text", "") or it.get("body", "") or ""
-        lines.append(f"{i}) {note} — {sec} [score={sc:.2f}]\n{txt}\n")
-    return "\n".join(lines)
-
-
-def build_prompt(question: str, contexts: List[Dict]) -> str:
-    """Setzt Frage + Kontexte in ein konsistentes Template."""
-    return PROMPT_TEMPLATE.format(question=question, contexts=build_context_block(contexts))
-
-
-def call_ollama(prompt: str, model: str = "llama3.1:8b", timeout_s: int = 120) -> Optional[str]:
-    """
-    Optionaler lokaler Aufruf von `ollama run`.
-    Rückgabe: generierter Text oder None bei Fehler/Abbruch.
-    Hinweis: Nur nutzen, wenn Ollama lokal installiert/konfiguriert ist.
-    """
-    try:
-        proc = subprocess.run(
-            ["ollama", "run", model],
-            input=prompt.encode("utf-8"),
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            timeout=timeout_s,
-            check=False,
-        )
-        out = proc.stdout.decode("utf-8", errors="replace")
-        # viele ollama Builds streamen JSON-Zeilen; robust extrahieren:
-        try:
-            # Falls JSONL, letztes "response" zusammenfassen
-            texts = []
-            for line in out.splitlines():
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    obj = json.loads(line)
-                    if "response" in obj:
-                        texts.append(obj["response"])
-                except Exception:
-                    texts.append(line)
-            return "".join(texts).strip()
-        except Exception:
-            return out.strip()
-    except Exception:
-        return None
--- a/schemas/chunk.schema.json
+++ b/schemas/chunk.schema.json
@ -1,90 +0,0 @@
-{
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "title": "mindnet_chunk",
-  "type": "object",
-  "description": "Chunk-Payload (Qdrant). Kompatibel mit Alt-Feldern und neuen Feldern für Export/Roundtrip.",
-  "required": ["id", "note_id", "chunk_index", "path"],
-  "properties": {
-    "id":           { "type": "string" },
-    "scope":        { "type": "string", "enum": ["chunk"] },
-    "note_id":      { "type": "string" },
-    "note_title":   { "type": "string" },
-    "note_type":    { "type": "string" },
-    "note_status":  { "type": "string" },
-    "type":         { "type": "string", "description": "Legacy: früherer Chunk-Typ; kann dem Note-Typ entsprechen" },
-    "area":         { "type": "string" },
-    "project":      { "type": "string" },
-    "tags":         { "type": "array", "items": { "type": "string" } },
-
-    "note_path":    { "type": "string" },
-    "path":         { "type": "string" },
-
-    "chunk_index":  { "type": "integer" },
-    "section_title":{ "type": ["string","null"] },
-    "section_path": { "type": ["string","null"] },
-
-    "char_start":   { "type": ["integer","null"] },
-    "char_end":     { "type": ["integer","null"] },
-    "char_len":     { "type": "integer" },
-
-    "token_count":  { "type": "integer", "description": "Legacy: frühere Token-Zahl" },
-    "token_est":    { "type": "integer", "description": "Neue grobe Token-Schätzung (≈ len(text)/4)" },
-
-    "neighbors": {
-      "type": "object",
-      "properties": {
-        "prev": { "type": ["string","null"] },
-        "next": { "type": ["string","null"] }
-      },
-      "additionalProperties": false
-    },
-
-    "text":         { "type": "string" },
-    "text_sha256":  { "type": "string", "pattern": "^sha256:[0-9a-fA-F]{64}$" },
-    "lang":         { "type": "string" },
-
-    "wikilinks":    { "type": "array", "items": { "type": "string" } },
-    "external_links": {
-      "type": "array",
-      "items": {
-        "anyOf": [
-          { "type": "string" },
-          {
-            "type": "object",
-            "properties": {
-              "href":  { "type": "string" },
-              "label": { "type": ["string","null"] }
-            },
-            "required": ["href"],
-            "additionalProperties": false
-          }
-        ]
-      }
-    },
-    "references": {
-      "type": "array",
-      "items": {
-        "type": "object",
-        "properties": {
-          "target_id": { "type": "string" },
-          "kind":      { "type": "string" }
-        },
-        "required": ["target_id","kind"],
-        "additionalProperties": true
-      }
-    },
-
-    "embed_model":   { "type": "string" },
-    "embed_dim":     { "type": "integer" },
-    "embed_version": { "type": "integer" },
-
-    "created_at":   { "type": "string" }
-  },
-
-  "allOf": [
-    { "anyOf": [ { "required": ["token_count"] }, { "required": ["token_est"] } ] },
-    { "anyOf": [ { "required": ["type"] }, { "required": ["note_type"] } ] }
-  ],
-
-  "additionalProperties": true
-}
--- a/schemas/edge.schema.json
+++ b/schemas/edge.schema.json
@ -1,31 +0,0 @@
-{
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "title": "mindnet_edge",
-  "type": "object",
-  "description": "Edge-Payload (Qdrant). Unterstützt Legacy (edge_type/src_id/dst_id) und neues Schema (kind/source_id/target_id/note_id/status).",
-
-  "properties": {
-    "scope":     { "type": "string", "enum": ["note","chunk"] },
-
-    "edge_type": { "type": "string", "description": "Legacy: z. B. references/backlink/belongs_to/prev/next" },
-    "src_id":    { "type": "string", "description": "Legacy: source_id" },
-    "dst_id":    { "type": "string", "description": "Legacy: target_id" },
-
-    "kind":      { "type": "string", "description": "Neu: z. B. references/backlink/belongs_to/prev/next" },
-    "source_id": { "type": "string" },
-    "target_id": { "type": "string" },
-    "note_id":   { "type": "string", "description": "Owner-Note für diesen Edge (Filter/Purge)" },
-    "status":    { "type": "string", "description": "optional, z. B. 'unresolved'" },
-
-    "weight":    { "type": "number" },
-    "meta":      { "type": "object" },
-    "created_at":{ "type": "string" }
-  },
-
-  "anyOf": [
-    { "required": ["src_id", "dst_id", "edge_type", "scope"] },
-    { "required": ["source_id", "target_id", "kind", "scope"] }
-  ],
-
-  "additionalProperties": true
-}
--- a/schemas/note.schema.json
+++ b/schemas/note.schema.json
@ -1,45 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "mindnet note payload",
-  "type": "object",
-  "properties": {
-    "note_id": { "type": "string" },
-    "title": { "type": ["string","null"] },
-    "type": { "type": ["string","null"] },
-    "status": { "type": ["string","null"] },
-    "created": { "type": ["string","null"] },
-    "updated": { "type": ["string","null"] },
-    "path":    { "type": ["string","null"] },
-    "tags":    { "type": ["array","null"], "items": { "type": "string" } },
-    "area":    { "type": ["string","null"] },
-    "project": { "type": ["string","null"] },
-    "source":  { "type": ["string","null"] },
-    "lang":    { "type": ["string","null"] },
-    "slug":    { "type": ["string","null"] },
-    "aliases": { "type": ["array","null"], "items": { "type": "string" } },
-
-    "fulltext": { "type": ["string","null"] },
-    "references": { "type": ["array","null"], "items": { "type": "string" } },
-
-    "hash_fulltext": { "type": ["string","null"], "pattern": "^[a-f0-9]{64}$" },
-    "hash_signature": { "type": ["string","null"] },
-
-    "hash_body":        { "type": ["string","null"], "pattern": "^[a-f0-9]{64}$" },
-    "hash_frontmatter": { "type": ["string","null"], "pattern": "^[a-f0-9]{64}$" },
-    "hash_full":        { "type": ["string","null"], "pattern": "^[a-f0-9]{64}$" },
-
-    "hashes": {
-      "type": ["object","null"],
-      "description": "Mapping: <mode>:<source>:<normalize> -> sha256 hex",
-      "patternProperties": {
-        "^(body|frontmatter|full):(parsed|raw):(canonical|none)$": {
-          "type": "string",
-          "pattern": "^[a-f0-9]{64}$"
-        }
-      },
-      "additionalProperties": false
-    }
-  },
-  "required": ["note_id"],
-  "additionalProperties": true
-}