erster Stand
This commit is contained in:
parent
865b261294
commit
0170d9291f
|
|
@ -262,3 +262,41 @@ class IngestionService:
|
|||
self.client.delete(collection_name=f"{self.prefix}_{suffix}", points_selector=selector)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def create_from_text(
|
||||
self,
|
||||
markdown_content: str,
|
||||
filename: str,
|
||||
vault_root: str,
|
||||
folder: str = "Inbox" # Standard-Ordner für neue Files
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
WP-11: Schreibt Text in eine physische Datei und indiziert sie sofort.
|
||||
"""
|
||||
# 1. Pfad vorbereiten
|
||||
target_dir = os.path.join(vault_root, folder)
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
# Dateiname bereinigen (Sicherheit)
|
||||
safe_filename = os.path.basename(filename)
|
||||
if not safe_filename.endswith(".md"):
|
||||
safe_filename += ".md"
|
||||
|
||||
file_path = os.path.join(target_dir, safe_filename)
|
||||
|
||||
# 2. Schreiben (Write to Disk - Single Source of Truth)
|
||||
try:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(markdown_content)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": f"Disk write failed: {str(e)}"}
|
||||
|
||||
# 3. Indizieren (Ingest)
|
||||
# Wir rufen einfach die existierende Logik auf!
|
||||
return self.process_file(
|
||||
file_path=file_path,
|
||||
vault_root=vault_root,
|
||||
apply=True, # Sofort schreiben
|
||||
force_replace=True, # Da neu, erzwingen wir Update
|
||||
purge_before=True # Sauberer Start
|
||||
)
|
||||
11
app/main.py
11
app/main.py
|
|
@ -11,16 +11,18 @@ from .routers.query import router as query_router
|
|||
from .routers.graph import router as graph_router
|
||||
from .routers.tools import router as tools_router
|
||||
from .routers.feedback import router as feedback_router
|
||||
# NEU: Chat Router (WP-05)
|
||||
from .routers.chat import router as chat_router
|
||||
|
||||
# NEU: Ingest Router (WP-11)
|
||||
from .routers.ingest import router as ingest_router
|
||||
|
||||
try:
|
||||
from .routers.admin import router as admin_router
|
||||
except Exception:
|
||||
admin_router = None
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
app = FastAPI(title="mindnet API", version="0.5.0") # Version bump WP-05
|
||||
app = FastAPI(title="mindnet API", version="0.6.0") # Version bump WP-11
|
||||
s = get_settings()
|
||||
|
||||
@app.get("/healthz")
|
||||
|
|
@ -34,10 +36,11 @@ def create_app() -> FastAPI:
|
|||
app.include_router(graph_router, prefix="/graph", tags=["graph"])
|
||||
app.include_router(tools_router, prefix="/tools", tags=["tools"])
|
||||
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
|
||||
|
||||
# NEU: Chat Endpoint
|
||||
app.include_router(chat_router, prefix="/chat", tags=["chat"])
|
||||
|
||||
# NEU: Registrierung des Ingest-Routers
|
||||
app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
|
||||
|
||||
if admin_router:
|
||||
app.include_router(admin_router, prefix="/admin", tags=["admin"])
|
||||
|
||||
|
|
|
|||
89
app/routers/ingest.py
Normal file
89
app/routers/ingest.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
"""
|
||||
app/routers/ingest.py
|
||||
API-Endpunkte für WP-11 (Discovery & Persistence).
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from app.core.ingestion import IngestionService
|
||||
from app.services.discovery import DiscoveryService
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# --- DTOs ---
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
text: str
|
||||
type: str = "concept"
|
||||
|
||||
class SaveRequest(BaseModel):
|
||||
markdown_content: str
|
||||
filename: Optional[str] = None # Optional, fallback auf Timestamp/Titel
|
||||
folder: str = "00_Inbox" # Zielordner im Vault
|
||||
|
||||
class SaveResponse(BaseModel):
|
||||
status: str
|
||||
file_path: str
|
||||
note_id: str
|
||||
stats: Dict[str, Any]
|
||||
|
||||
# --- Services ---
|
||||
# Instanzierung hier oder via Dependency Injection
|
||||
discovery_service = DiscoveryService()
|
||||
|
||||
@router.post("/analyze")
|
||||
async def analyze_draft(req: AnalyzeRequest):
|
||||
"""
|
||||
WP-11 Intelligence: Analysiert einen Entwurf und liefert Link-Vorschläge.
|
||||
"""
|
||||
try:
|
||||
result = await discovery_service.analyze_draft(req.text, req.type)
|
||||
return result
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
||||
|
||||
@router.post("/save", response_model=SaveResponse)
|
||||
async def save_note(req: SaveRequest):
|
||||
"""
|
||||
WP-11 Persistence: Speichert Markdown physisch und indiziert es in Qdrant.
|
||||
"""
|
||||
# 1. Vault Root ermitteln
|
||||
vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||
if not os.path.exists(vault_root):
|
||||
# Fallback für Dev-Umgebungen
|
||||
if os.path.exists("../vault"):
|
||||
vault_root = "../vault"
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Vault root not configured or missing")
|
||||
|
||||
# 2. Filename generieren falls fehlend
|
||||
final_filename = req.filename
|
||||
if not final_filename:
|
||||
# Einfacher Fallback: Timestamp
|
||||
final_filename = f"draft_{int(time.time())}.md"
|
||||
|
||||
# 3. Ingestion Service nutzen
|
||||
ingest_service = IngestionService() # nutzt Default-Prefix oder aus Env
|
||||
|
||||
result = ingest_service.create_from_text(
|
||||
markdown_content=req.markdown_content,
|
||||
filename=final_filename,
|
||||
vault_root=os.path.abspath(vault_root),
|
||||
folder=req.folder
|
||||
)
|
||||
|
||||
if result.get("status") == "error":
|
||||
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||
|
||||
return SaveResponse(
|
||||
status="success",
|
||||
file_path=result["path"],
|
||||
note_id=result.get("note_id", "unknown"),
|
||||
stats={
|
||||
"chunks": result.get("chunks_count", 0),
|
||||
"edges": result.get("edges_count", 0)
|
||||
}
|
||||
)
|
||||
151
app/services/discovery.py
Normal file
151
app/services/discovery.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
"""
|
||||
app/services/discovery.py
|
||||
Service für Link-Vorschläge und Knowledge-Discovery (WP-11).
|
||||
Analysiert Drafts auf Keywords und semantische Ähnlichkeiten.
|
||||
"""
|
||||
import logging
|
||||
from typing import List, Dict, Any, Set
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from app.core.qdrant import QdrantConfig, get_client
|
||||
from app.models.dto import QueryRequest
|
||||
from app.core.retriever import hybrid_retrieve
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DiscoveryService:
|
||||
def __init__(self, collection_prefix: str = "mindnet"):
|
||||
self.prefix = collection_prefix
|
||||
self.cfg = QdrantConfig.from_env()
|
||||
self.cfg.prefix = collection_prefix
|
||||
self.client = get_client(self.cfg)
|
||||
|
||||
async def analyze_draft(self, text: str, current_type: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analysiert einen Draft-Text und schlägt Verlinkungen vor.
|
||||
Kombiniert Exact Match (Titel) und Semantic Match (Vektor).
|
||||
"""
|
||||
suggestions = []
|
||||
|
||||
# 1. Exact Match: Finde Begriffe im Text, die als Notiz-Titel existieren
|
||||
# (Bei sehr großen Vaults >10k sollte dies gecached werden)
|
||||
known_entities = self._fetch_all_titles_and_aliases()
|
||||
found_entities = self._find_entities_in_text(text, known_entities)
|
||||
|
||||
existing_target_ids = set()
|
||||
|
||||
for entity in found_entities:
|
||||
existing_target_ids.add(entity["id"])
|
||||
suggestions.append({
|
||||
"type": "exact_match",
|
||||
"text_found": entity["match"],
|
||||
"target_title": entity["title"],
|
||||
"target_id": entity["id"],
|
||||
"confidence": 1.0,
|
||||
"reason": "Existierender Notiz-Titel"
|
||||
})
|
||||
|
||||
# 2. Semantic Match: Finde inhaltlich ähnliche Notizen via Vektor-Suche
|
||||
# Wir filtern Ergebnisse heraus, die wir schon per Exact Match gefunden haben.
|
||||
semantic_hits = self._get_semantic_suggestions(text)
|
||||
|
||||
for hit in semantic_hits:
|
||||
if hit.node_id in existing_target_ids:
|
||||
continue
|
||||
|
||||
# Schwellwert: Nur relevante Vorschläge anzeigen (z.B. > 0.65)
|
||||
# Wir nutzen den total_score, der bereits Typ-Gewichte enthält.
|
||||
if hit.total_score > 0.65:
|
||||
suggestions.append({
|
||||
"type": "semantic_match",
|
||||
"text_found": (hit.source.get("text") or "")[:50] + "...", # Snippet
|
||||
"target_title": hit.payload.get("title", "Unbekannt"),
|
||||
"target_id": hit.node_id,
|
||||
"confidence": round(hit.total_score, 2),
|
||||
"reason": f"Inhaltliche Ähnlichkeit (Score: {round(hit.total_score, 2)})"
|
||||
})
|
||||
|
||||
return {
|
||||
"draft_length": len(text),
|
||||
"suggestions_count": len(suggestions),
|
||||
"suggestions": suggestions
|
||||
}
|
||||
|
||||
def _fetch_all_titles_and_aliases(self) -> List[Dict]:
|
||||
"""Lädt alle Titel und Aliases aus der Notes-Collection."""
|
||||
notes = []
|
||||
next_page = None
|
||||
col_name = f"{self.prefix}_notes"
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Scroll API nutzen, um effizient alle Metadaten zu laden
|
||||
res, next_page = self.client.scroll(
|
||||
collection_name=col_name,
|
||||
limit=1000,
|
||||
offset=next_page,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
for point in res:
|
||||
pl = point.payload or {}
|
||||
notes.append({
|
||||
"id": pl.get("note_id"),
|
||||
"title": pl.get("title"),
|
||||
"aliases": pl.get("aliases", [])
|
||||
})
|
||||
|
||||
if next_page is None:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching titles: {e}")
|
||||
return []
|
||||
|
||||
return notes
|
||||
|
||||
def _find_entities_in_text(self, text: str, entities: List[Dict]) -> List[Dict]:
|
||||
"""
|
||||
Sucht Vorkommen von Titeln im Text (Case-Insensitive).
|
||||
"""
|
||||
found = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for entity in entities:
|
||||
# 1. Titel prüfen
|
||||
title = entity.get("title")
|
||||
if title and title.lower() in text_lower:
|
||||
found.append({
|
||||
"match": title,
|
||||
"title": title,
|
||||
"id": entity["id"]
|
||||
})
|
||||
continue # Wenn Titel gefunden, Aliases nicht mehr prüfen (Prio)
|
||||
|
||||
# 2. Aliases prüfen
|
||||
aliases = entity.get("aliases")
|
||||
if aliases and isinstance(aliases, list):
|
||||
for alias in aliases:
|
||||
if alias and str(alias).lower() in text_lower:
|
||||
found.append({
|
||||
"match": alias,
|
||||
"title": title, # Target ist immer der Haupt-Titel
|
||||
"id": entity["id"]
|
||||
})
|
||||
break
|
||||
return found
|
||||
|
||||
def _get_semantic_suggestions(self, text: str):
|
||||
"""Wrapper um den Hybrid Retriever."""
|
||||
# Wir nutzen eine vereinfachte Query
|
||||
req = QueryRequest(
|
||||
query=text,
|
||||
top_k=5,
|
||||
explain=False
|
||||
)
|
||||
try:
|
||||
# hybrid_retrieve ist sync, wird aber schnell genug sein für diesen Kontext
|
||||
res = hybrid_retrieve(req)
|
||||
return res.results
|
||||
except Exception as e:
|
||||
logger.error(f"Semantic suggestion failed: {e}")
|
||||
return []
|
||||
Loading…
Reference in New Issue
Block a user