erster Stand
This commit is contained in:
parent
865b261294
commit
0170d9291f
|
|
@ -261,4 +261,42 @@ class IngestionService:
|
||||||
try:
|
try:
|
||||||
self.client.delete(collection_name=f"{self.prefix}_{suffix}", points_selector=selector)
|
self.client.delete(collection_name=f"{self.prefix}_{suffix}", points_selector=selector)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def create_from_text(
|
||||||
|
self,
|
||||||
|
markdown_content: str,
|
||||||
|
filename: str,
|
||||||
|
vault_root: str,
|
||||||
|
folder: str = "Inbox" # Standard-Ordner für neue Files
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
WP-11: Schreibt Text in eine physische Datei und indiziert sie sofort.
|
||||||
|
"""
|
||||||
|
# 1. Pfad vorbereiten
|
||||||
|
target_dir = os.path.join(vault_root, folder)
|
||||||
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Dateiname bereinigen (Sicherheit)
|
||||||
|
safe_filename = os.path.basename(filename)
|
||||||
|
if not safe_filename.endswith(".md"):
|
||||||
|
safe_filename += ".md"
|
||||||
|
|
||||||
|
file_path = os.path.join(target_dir, safe_filename)
|
||||||
|
|
||||||
|
# 2. Schreiben (Write to Disk - Single Source of Truth)
|
||||||
|
try:
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(markdown_content)
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "error", "error": f"Disk write failed: {str(e)}"}
|
||||||
|
|
||||||
|
# 3. Indizieren (Ingest)
|
||||||
|
# Wir rufen einfach die existierende Logik auf!
|
||||||
|
return self.process_file(
|
||||||
|
file_path=file_path,
|
||||||
|
vault_root=vault_root,
|
||||||
|
apply=True, # Sofort schreiben
|
||||||
|
force_replace=True, # Da neu, erzwingen wir Update
|
||||||
|
purge_before=True # Sauberer Start
|
||||||
|
)
|
||||||
11
app/main.py
11
app/main.py
|
|
@ -11,16 +11,18 @@ from .routers.query import router as query_router
|
||||||
from .routers.graph import router as graph_router
|
from .routers.graph import router as graph_router
|
||||||
from .routers.tools import router as tools_router
|
from .routers.tools import router as tools_router
|
||||||
from .routers.feedback import router as feedback_router
|
from .routers.feedback import router as feedback_router
|
||||||
# NEU: Chat Router (WP-05)
|
|
||||||
from .routers.chat import router as chat_router
|
from .routers.chat import router as chat_router
|
||||||
|
|
||||||
|
# NEU: Ingest Router (WP-11)
|
||||||
|
from .routers.ingest import router as ingest_router
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from .routers.admin import router as admin_router
|
from .routers.admin import router as admin_router
|
||||||
except Exception:
|
except Exception:
|
||||||
admin_router = None
|
admin_router = None
|
||||||
|
|
||||||
def create_app() -> FastAPI:
|
def create_app() -> FastAPI:
|
||||||
app = FastAPI(title="mindnet API", version="0.5.0") # Version bump WP-05
|
app = FastAPI(title="mindnet API", version="0.6.0") # Version bump WP-11
|
||||||
s = get_settings()
|
s = get_settings()
|
||||||
|
|
||||||
@app.get("/healthz")
|
@app.get("/healthz")
|
||||||
|
|
@ -34,9 +36,10 @@ def create_app() -> FastAPI:
|
||||||
app.include_router(graph_router, prefix="/graph", tags=["graph"])
|
app.include_router(graph_router, prefix="/graph", tags=["graph"])
|
||||||
app.include_router(tools_router, prefix="/tools", tags=["tools"])
|
app.include_router(tools_router, prefix="/tools", tags=["tools"])
|
||||||
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
|
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
|
||||||
|
|
||||||
# NEU: Chat Endpoint
|
|
||||||
app.include_router(chat_router, prefix="/chat", tags=["chat"])
|
app.include_router(chat_router, prefix="/chat", tags=["chat"])
|
||||||
|
|
||||||
|
# NEU: Registrierung des Ingest-Routers
|
||||||
|
app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
|
||||||
|
|
||||||
if admin_router:
|
if admin_router:
|
||||||
app.include_router(admin_router, prefix="/admin", tags=["admin"])
|
app.include_router(admin_router, prefix="/admin", tags=["admin"])
|
||||||
|
|
|
||||||
89
app/routers/ingest.py
Normal file
89
app/routers/ingest.py
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
"""
|
||||||
|
app/routers/ingest.py
|
||||||
|
API-Endpunkte für WP-11 (Discovery & Persistence).
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
|
||||||
|
from app.core.ingestion import IngestionService
|
||||||
|
from app.services.discovery import DiscoveryService
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
# --- DTOs ---
|
||||||
|
|
||||||
|
class AnalyzeRequest(BaseModel):
|
||||||
|
text: str
|
||||||
|
type: str = "concept"
|
||||||
|
|
||||||
|
class SaveRequest(BaseModel):
|
||||||
|
markdown_content: str
|
||||||
|
filename: Optional[str] = None # Optional, fallback auf Timestamp/Titel
|
||||||
|
folder: str = "00_Inbox" # Zielordner im Vault
|
||||||
|
|
||||||
|
class SaveResponse(BaseModel):
|
||||||
|
status: str
|
||||||
|
file_path: str
|
||||||
|
note_id: str
|
||||||
|
stats: Dict[str, Any]
|
||||||
|
|
||||||
|
# --- Services ---
|
||||||
|
# Instanzierung hier oder via Dependency Injection
|
||||||
|
discovery_service = DiscoveryService()
|
||||||
|
|
||||||
|
@router.post("/analyze")
|
||||||
|
async def analyze_draft(req: AnalyzeRequest):
|
||||||
|
"""
|
||||||
|
WP-11 Intelligence: Analysiert einen Entwurf und liefert Link-Vorschläge.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result = await discovery_service.analyze_draft(req.text, req.type)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
||||||
|
|
||||||
|
@router.post("/save", response_model=SaveResponse)
|
||||||
|
async def save_note(req: SaveRequest):
|
||||||
|
"""
|
||||||
|
WP-11 Persistence: Speichert Markdown physisch und indiziert es in Qdrant.
|
||||||
|
"""
|
||||||
|
# 1. Vault Root ermitteln
|
||||||
|
vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
|
if not os.path.exists(vault_root):
|
||||||
|
# Fallback für Dev-Umgebungen
|
||||||
|
if os.path.exists("../vault"):
|
||||||
|
vault_root = "../vault"
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=500, detail="Vault root not configured or missing")
|
||||||
|
|
||||||
|
# 2. Filename generieren falls fehlend
|
||||||
|
final_filename = req.filename
|
||||||
|
if not final_filename:
|
||||||
|
# Einfacher Fallback: Timestamp
|
||||||
|
final_filename = f"draft_{int(time.time())}.md"
|
||||||
|
|
||||||
|
# 3. Ingestion Service nutzen
|
||||||
|
ingest_service = IngestionService() # nutzt Default-Prefix oder aus Env
|
||||||
|
|
||||||
|
result = ingest_service.create_from_text(
|
||||||
|
markdown_content=req.markdown_content,
|
||||||
|
filename=final_filename,
|
||||||
|
vault_root=os.path.abspath(vault_root),
|
||||||
|
folder=req.folder
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.get("status") == "error":
|
||||||
|
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||||
|
|
||||||
|
return SaveResponse(
|
||||||
|
status="success",
|
||||||
|
file_path=result["path"],
|
||||||
|
note_id=result.get("note_id", "unknown"),
|
||||||
|
stats={
|
||||||
|
"chunks": result.get("chunks_count", 0),
|
||||||
|
"edges": result.get("edges_count", 0)
|
||||||
|
}
|
||||||
|
)
|
||||||
151
app/services/discovery.py
Normal file
151
app/services/discovery.py
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
"""
|
||||||
|
app/services/discovery.py
|
||||||
|
Service für Link-Vorschläge und Knowledge-Discovery (WP-11).
|
||||||
|
Analysiert Drafts auf Keywords und semantische Ähnlichkeiten.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any, Set
|
||||||
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
|
from app.core.qdrant import QdrantConfig, get_client
|
||||||
|
from app.models.dto import QueryRequest
|
||||||
|
from app.core.retriever import hybrid_retrieve
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DiscoveryService:
|
||||||
|
def __init__(self, collection_prefix: str = "mindnet"):
|
||||||
|
self.prefix = collection_prefix
|
||||||
|
self.cfg = QdrantConfig.from_env()
|
||||||
|
self.cfg.prefix = collection_prefix
|
||||||
|
self.client = get_client(self.cfg)
|
||||||
|
|
||||||
|
async def analyze_draft(self, text: str, current_type: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Analysiert einen Draft-Text und schlägt Verlinkungen vor.
|
||||||
|
Kombiniert Exact Match (Titel) und Semantic Match (Vektor).
|
||||||
|
"""
|
||||||
|
suggestions = []
|
||||||
|
|
||||||
|
# 1. Exact Match: Finde Begriffe im Text, die als Notiz-Titel existieren
|
||||||
|
# (Bei sehr großen Vaults >10k sollte dies gecached werden)
|
||||||
|
known_entities = self._fetch_all_titles_and_aliases()
|
||||||
|
found_entities = self._find_entities_in_text(text, known_entities)
|
||||||
|
|
||||||
|
existing_target_ids = set()
|
||||||
|
|
||||||
|
for entity in found_entities:
|
||||||
|
existing_target_ids.add(entity["id"])
|
||||||
|
suggestions.append({
|
||||||
|
"type": "exact_match",
|
||||||
|
"text_found": entity["match"],
|
||||||
|
"target_title": entity["title"],
|
||||||
|
"target_id": entity["id"],
|
||||||
|
"confidence": 1.0,
|
||||||
|
"reason": "Existierender Notiz-Titel"
|
||||||
|
})
|
||||||
|
|
||||||
|
# 2. Semantic Match: Finde inhaltlich ähnliche Notizen via Vektor-Suche
|
||||||
|
# Wir filtern Ergebnisse heraus, die wir schon per Exact Match gefunden haben.
|
||||||
|
semantic_hits = self._get_semantic_suggestions(text)
|
||||||
|
|
||||||
|
for hit in semantic_hits:
|
||||||
|
if hit.node_id in existing_target_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Schwellwert: Nur relevante Vorschläge anzeigen (z.B. > 0.65)
|
||||||
|
# Wir nutzen den total_score, der bereits Typ-Gewichte enthält.
|
||||||
|
if hit.total_score > 0.65:
|
||||||
|
suggestions.append({
|
||||||
|
"type": "semantic_match",
|
||||||
|
"text_found": (hit.source.get("text") or "")[:50] + "...", # Snippet
|
||||||
|
"target_title": hit.payload.get("title", "Unbekannt"),
|
||||||
|
"target_id": hit.node_id,
|
||||||
|
"confidence": round(hit.total_score, 2),
|
||||||
|
"reason": f"Inhaltliche Ähnlichkeit (Score: {round(hit.total_score, 2)})"
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"draft_length": len(text),
|
||||||
|
"suggestions_count": len(suggestions),
|
||||||
|
"suggestions": suggestions
|
||||||
|
}
|
||||||
|
|
||||||
|
def _fetch_all_titles_and_aliases(self) -> List[Dict]:
|
||||||
|
"""Lädt alle Titel und Aliases aus der Notes-Collection."""
|
||||||
|
notes = []
|
||||||
|
next_page = None
|
||||||
|
col_name = f"{self.prefix}_notes"
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
# Scroll API nutzen, um effizient alle Metadaten zu laden
|
||||||
|
res, next_page = self.client.scroll(
|
||||||
|
collection_name=col_name,
|
||||||
|
limit=1000,
|
||||||
|
offset=next_page,
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False
|
||||||
|
)
|
||||||
|
for point in res:
|
||||||
|
pl = point.payload or {}
|
||||||
|
notes.append({
|
||||||
|
"id": pl.get("note_id"),
|
||||||
|
"title": pl.get("title"),
|
||||||
|
"aliases": pl.get("aliases", [])
|
||||||
|
})
|
||||||
|
|
||||||
|
if next_page is None:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching titles: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
return notes
|
||||||
|
|
||||||
|
def _find_entities_in_text(self, text: str, entities: List[Dict]) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Sucht Vorkommen von Titeln im Text (Case-Insensitive).
|
||||||
|
"""
|
||||||
|
found = []
|
||||||
|
text_lower = text.lower()
|
||||||
|
|
||||||
|
for entity in entities:
|
||||||
|
# 1. Titel prüfen
|
||||||
|
title = entity.get("title")
|
||||||
|
if title and title.lower() in text_lower:
|
||||||
|
found.append({
|
||||||
|
"match": title,
|
||||||
|
"title": title,
|
||||||
|
"id": entity["id"]
|
||||||
|
})
|
||||||
|
continue # Wenn Titel gefunden, Aliases nicht mehr prüfen (Prio)
|
||||||
|
|
||||||
|
# 2. Aliases prüfen
|
||||||
|
aliases = entity.get("aliases")
|
||||||
|
if aliases and isinstance(aliases, list):
|
||||||
|
for alias in aliases:
|
||||||
|
if alias and str(alias).lower() in text_lower:
|
||||||
|
found.append({
|
||||||
|
"match": alias,
|
||||||
|
"title": title, # Target ist immer der Haupt-Titel
|
||||||
|
"id": entity["id"]
|
||||||
|
})
|
||||||
|
break
|
||||||
|
return found
|
||||||
|
|
||||||
|
def _get_semantic_suggestions(self, text: str):
|
||||||
|
"""Wrapper um den Hybrid Retriever."""
|
||||||
|
# Wir nutzen eine vereinfachte Query
|
||||||
|
req = QueryRequest(
|
||||||
|
query=text,
|
||||||
|
top_k=5,
|
||||||
|
explain=False
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
# hybrid_retrieve ist sync, wird aber schnell genug sein für diesen Kontext
|
||||||
|
res = hybrid_retrieve(req)
|
||||||
|
return res.results
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Semantic suggestion failed: {e}")
|
||||||
|
return []
|
||||||
Loading…
Reference in New Issue
Block a user