erster Stand

This commit is contained in:
Lars 2025-12-10 22:05:35 +01:00
parent 865b261294
commit 0170d9291f
4 changed files with 286 additions and 5 deletions

View File

@ -261,4 +261,42 @@ class IngestionService:
try:
self.client.delete(collection_name=f"{self.prefix}_{suffix}", points_selector=selector)
except Exception:
pass
pass
def create_from_text(
self,
markdown_content: str,
filename: str,
vault_root: str,
folder: str = "Inbox" # Standard-Ordner für neue Files
) -> Dict[str, Any]:
"""
WP-11: Schreibt Text in eine physische Datei und indiziert sie sofort.
"""
# 1. Pfad vorbereiten
target_dir = os.path.join(vault_root, folder)
os.makedirs(target_dir, exist_ok=True)
# Dateiname bereinigen (Sicherheit)
safe_filename = os.path.basename(filename)
if not safe_filename.endswith(".md"):
safe_filename += ".md"
file_path = os.path.join(target_dir, safe_filename)
# 2. Schreiben (Write to Disk - Single Source of Truth)
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(markdown_content)
except Exception as e:
return {"status": "error", "error": f"Disk write failed: {str(e)}"}
# 3. Indizieren (Ingest)
# Wir rufen einfach die existierende Logik auf!
return self.process_file(
file_path=file_path,
vault_root=vault_root,
apply=True, # Sofort schreiben
force_replace=True, # Da neu, erzwingen wir Update
purge_before=True # Sauberer Start
)

View File

@ -11,16 +11,18 @@ from .routers.query import router as query_router
from .routers.graph import router as graph_router
from .routers.tools import router as tools_router
from .routers.feedback import router as feedback_router
# NEU: Chat Router (WP-05)
from .routers.chat import router as chat_router
# NEU: Ingest Router (WP-11)
from .routers.ingest import router as ingest_router
try:
from .routers.admin import router as admin_router
except Exception:
admin_router = None
def create_app() -> FastAPI:
app = FastAPI(title="mindnet API", version="0.5.0") # Version bump WP-05
app = FastAPI(title="mindnet API", version="0.6.0") # Version bump WP-11
s = get_settings()
@app.get("/healthz")
@ -34,9 +36,10 @@ def create_app() -> FastAPI:
app.include_router(graph_router, prefix="/graph", tags=["graph"])
app.include_router(tools_router, prefix="/tools", tags=["tools"])
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
# NEU: Chat Endpoint
app.include_router(chat_router, prefix="/chat", tags=["chat"])
# NEU: Registrierung des Ingest-Routers
app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
if admin_router:
app.include_router(admin_router, prefix="/admin", tags=["admin"])

89
app/routers/ingest.py Normal file
View File

@ -0,0 +1,89 @@
"""
app/routers/ingest.py
API-Endpunkte für WP-11 (Discovery & Persistence).
"""
import os
import time
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
from app.core.ingestion import IngestionService
from app.services.discovery import DiscoveryService
router = APIRouter()
# --- DTOs ---
class AnalyzeRequest(BaseModel):
text: str
type: str = "concept"
class SaveRequest(BaseModel):
markdown_content: str
filename: Optional[str] = None # Optional, fallback auf Timestamp/Titel
folder: str = "00_Inbox" # Zielordner im Vault
class SaveResponse(BaseModel):
status: str
file_path: str
note_id: str
stats: Dict[str, Any]
# --- Services ---
# Instanzierung hier oder via Dependency Injection
discovery_service = DiscoveryService()
@router.post("/analyze")
async def analyze_draft(req: AnalyzeRequest):
"""
WP-11 Intelligence: Analysiert einen Entwurf und liefert Link-Vorschläge.
"""
try:
result = await discovery_service.analyze_draft(req.text, req.type)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
@router.post("/save", response_model=SaveResponse)
async def save_note(req: SaveRequest):
"""
WP-11 Persistence: Speichert Markdown physisch und indiziert es in Qdrant.
"""
# 1. Vault Root ermitteln
vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
if not os.path.exists(vault_root):
# Fallback für Dev-Umgebungen
if os.path.exists("../vault"):
vault_root = "../vault"
else:
raise HTTPException(status_code=500, detail="Vault root not configured or missing")
# 2. Filename generieren falls fehlend
final_filename = req.filename
if not final_filename:
# Einfacher Fallback: Timestamp
final_filename = f"draft_{int(time.time())}.md"
# 3. Ingestion Service nutzen
ingest_service = IngestionService() # nutzt Default-Prefix oder aus Env
result = ingest_service.create_from_text(
markdown_content=req.markdown_content,
filename=final_filename,
vault_root=os.path.abspath(vault_root),
folder=req.folder
)
if result.get("status") == "error":
raise HTTPException(status_code=500, detail=result.get("error"))
return SaveResponse(
status="success",
file_path=result["path"],
note_id=result.get("note_id", "unknown"),
stats={
"chunks": result.get("chunks_count", 0),
"edges": result.get("edges_count", 0)
}
)

151
app/services/discovery.py Normal file
View File

@ -0,0 +1,151 @@
"""
app/services/discovery.py
Service für Link-Vorschläge und Knowledge-Discovery (WP-11).
Analysiert Drafts auf Keywords und semantische Ähnlichkeiten.
"""
import logging
from typing import List, Dict, Any, Set
from qdrant_client.http import models as rest
from app.core.qdrant import QdrantConfig, get_client
from app.models.dto import QueryRequest
from app.core.retriever import hybrid_retrieve
logger = logging.getLogger(__name__)
class DiscoveryService:
def __init__(self, collection_prefix: str = "mindnet"):
self.prefix = collection_prefix
self.cfg = QdrantConfig.from_env()
self.cfg.prefix = collection_prefix
self.client = get_client(self.cfg)
async def analyze_draft(self, text: str, current_type: str) -> Dict[str, Any]:
"""
Analysiert einen Draft-Text und schlägt Verlinkungen vor.
Kombiniert Exact Match (Titel) und Semantic Match (Vektor).
"""
suggestions = []
# 1. Exact Match: Finde Begriffe im Text, die als Notiz-Titel existieren
# (Bei sehr großen Vaults >10k sollte dies gecached werden)
known_entities = self._fetch_all_titles_and_aliases()
found_entities = self._find_entities_in_text(text, known_entities)
existing_target_ids = set()
for entity in found_entities:
existing_target_ids.add(entity["id"])
suggestions.append({
"type": "exact_match",
"text_found": entity["match"],
"target_title": entity["title"],
"target_id": entity["id"],
"confidence": 1.0,
"reason": "Existierender Notiz-Titel"
})
# 2. Semantic Match: Finde inhaltlich ähnliche Notizen via Vektor-Suche
# Wir filtern Ergebnisse heraus, die wir schon per Exact Match gefunden haben.
semantic_hits = self._get_semantic_suggestions(text)
for hit in semantic_hits:
if hit.node_id in existing_target_ids:
continue
# Schwellwert: Nur relevante Vorschläge anzeigen (z.B. > 0.65)
# Wir nutzen den total_score, der bereits Typ-Gewichte enthält.
if hit.total_score > 0.65:
suggestions.append({
"type": "semantic_match",
"text_found": (hit.source.get("text") or "")[:50] + "...", # Snippet
"target_title": hit.payload.get("title", "Unbekannt"),
"target_id": hit.node_id,
"confidence": round(hit.total_score, 2),
"reason": f"Inhaltliche Ähnlichkeit (Score: {round(hit.total_score, 2)})"
})
return {
"draft_length": len(text),
"suggestions_count": len(suggestions),
"suggestions": suggestions
}
def _fetch_all_titles_and_aliases(self) -> List[Dict]:
"""Lädt alle Titel und Aliases aus der Notes-Collection."""
notes = []
next_page = None
col_name = f"{self.prefix}_notes"
try:
while True:
# Scroll API nutzen, um effizient alle Metadaten zu laden
res, next_page = self.client.scroll(
collection_name=col_name,
limit=1000,
offset=next_page,
with_payload=True,
with_vectors=False
)
for point in res:
pl = point.payload or {}
notes.append({
"id": pl.get("note_id"),
"title": pl.get("title"),
"aliases": pl.get("aliases", [])
})
if next_page is None:
break
except Exception as e:
logger.error(f"Error fetching titles: {e}")
return []
return notes
def _find_entities_in_text(self, text: str, entities: List[Dict]) -> List[Dict]:
"""
Sucht Vorkommen von Titeln im Text (Case-Insensitive).
"""
found = []
text_lower = text.lower()
for entity in entities:
# 1. Titel prüfen
title = entity.get("title")
if title and title.lower() in text_lower:
found.append({
"match": title,
"title": title,
"id": entity["id"]
})
continue # Wenn Titel gefunden, Aliases nicht mehr prüfen (Prio)
# 2. Aliases prüfen
aliases = entity.get("aliases")
if aliases and isinstance(aliases, list):
for alias in aliases:
if alias and str(alias).lower() in text_lower:
found.append({
"match": alias,
"title": title, # Target ist immer der Haupt-Titel
"id": entity["id"]
})
break
return found
def _get_semantic_suggestions(self, text: str):
"""Wrapper um den Hybrid Retriever."""
# Wir nutzen eine vereinfachte Query
req = QueryRequest(
query=text,
top_k=5,
explain=False
)
try:
# hybrid_retrieve ist sync, wird aber schnell genug sein für diesen Kontext
res = hybrid_retrieve(req)
return res.results
except Exception as e:
logger.error(f"Semantic suggestion failed: {e}")
return []