From 565d34fb7b50be80ade7f30664dc80ed015a9ba5 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 10 Dec 2025 22:17:40 +0100 Subject: [PATCH] =?UTF-8?q?Erg=C3=A4nzung=20und=20Korrektur?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/core/note_payload.py | 14 ++++++++++++-- app/main.py | 8 +++++--- app/routers/ingest.py | 14 +++++++------- app/services/discovery.py | 36 +++++++++++++++++++----------------- 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/app/core/note_payload.py b/app/core/note_payload.py index 1c5e6bc..285012f 100644 --- a/app/core/note_payload.py +++ b/app/core/note_payload.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ Modul: app/core/note_payload.py -Version: 2.0.0 +Version: 2.1.0 (WP-11 Update: Aliases support) Zweck ----- @@ -145,6 +145,7 @@ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: - retriever_weight: effektives Gewicht für den Retriever - chunk_profile: Profil für Chunking (short|medium|long|default|...) - edge_defaults: Liste von Kanten-Typen, die als Defaults gelten + - aliases: Liste von Synonymen (WP-11) """ n = _as_dict(note) path_arg, types_cfg_explicit = _pick_args(*args, **kwargs) @@ -214,13 +215,22 @@ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: if tags: payload["tags"] = _ensure_list(tags) + # WP-11: Aliases übernehmen (für Discovery Service) + aliases = fm.get("aliases") + if aliases: + payload["aliases"] = _ensure_list(aliases) + # Zeitliche Metadaten (sofern vorhanden) for k in ("created", "modified", "date"): v = fm.get(k) or n.get(k) if v: payload[k] = str(v) + + # Fulltext (Fallback, falls body im Input) + if "body" in n and n["body"]: + payload["fulltext"] = str(n["body"]) # JSON-Roundtrip zur harten Validierung (ASCII beibehalten) json.loads(json.dumps(payload, ensure_ascii=False)) - return payload + return payload \ No newline at end of file diff --git a/app/main.py b/app/main.py index 98e7ea5..fa23b73 100644 --- a/app/main.py +++ b/app/main.py @@ -11,8 +11,8 @@ from .routers.query import router as query_router from .routers.graph import router as graph_router from .routers.tools import router as tools_router from .routers.feedback import router as feedback_router +# NEU: Chat Router (WP-05) from .routers.chat import router as chat_router - # NEU: Ingest Router (WP-11) from .routers.ingest import router as ingest_router @@ -36,9 +36,11 @@ def create_app() -> FastAPI: app.include_router(graph_router, prefix="/graph", tags=["graph"]) app.include_router(tools_router, prefix="/tools", tags=["tools"]) app.include_router(feedback_router, prefix="/feedback", tags=["feedback"]) - app.include_router(chat_router, prefix="/chat", tags=["chat"]) - # NEU: Registrierung des Ingest-Routers + # NEU: Chat Endpoint + app.include_router(chat_router, prefix="/chat", tags=["chat"]) + + # NEU: Ingest Endpoint app.include_router(ingest_router, prefix="/ingest", tags=["ingest"]) if admin_router: diff --git a/app/routers/ingest.py b/app/routers/ingest.py index 4e9a7a0..aa97f49 100644 --- a/app/routers/ingest.py +++ b/app/routers/ingest.py @@ -21,8 +21,8 @@ class AnalyzeRequest(BaseModel): class SaveRequest(BaseModel): markdown_content: str - filename: Optional[str] = None # Optional, fallback auf Timestamp/Titel - folder: str = "00_Inbox" # Zielordner im Vault + filename: Optional[str] = None # Optional, fallback auf Timestamp + folder: str = "00_Inbox" # Zielordner class SaveResponse(BaseModel): status: str @@ -31,7 +31,6 @@ class SaveResponse(BaseModel): stats: Dict[str, Any] # --- Services --- -# Instanzierung hier oder via Dependency Injection discovery_service = DiscoveryService() @router.post("/analyze") @@ -53,8 +52,10 @@ async def save_note(req: SaveRequest): # 1. Vault Root ermitteln vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault") if not os.path.exists(vault_root): - # Fallback für Dev-Umgebungen - if os.path.exists("../vault"): + # Fallback relative paths + if os.path.exists("vault"): + vault_root = "vault" + elif os.path.exists("../vault"): vault_root = "../vault" else: raise HTTPException(status_code=500, detail="Vault root not configured or missing") @@ -62,11 +63,10 @@ async def save_note(req: SaveRequest): # 2. Filename generieren falls fehlend final_filename = req.filename if not final_filename: - # Einfacher Fallback: Timestamp final_filename = f"draft_{int(time.time())}.md" # 3. Ingestion Service nutzen - ingest_service = IngestionService() # nutzt Default-Prefix oder aus Env + ingest_service = IngestionService() result = ingest_service.create_from_text( markdown_content=req.markdown_content, diff --git a/app/services/discovery.py b/app/services/discovery.py index 359a13a..6612e64 100644 --- a/app/services/discovery.py +++ b/app/services/discovery.py @@ -1,7 +1,6 @@ """ app/services/discovery.py Service für Link-Vorschläge und Knowledge-Discovery (WP-11). -Analysiert Drafts auf Keywords und semantische Ähnlichkeiten. """ import logging from typing import List, Dict, Any, Set @@ -14,21 +13,21 @@ from app.core.retriever import hybrid_retrieve logger = logging.getLogger(__name__) class DiscoveryService: - def __init__(self, collection_prefix: str = "mindnet"): - self.prefix = collection_prefix + def __init__(self, collection_prefix: str = None): self.cfg = QdrantConfig.from_env() - self.cfg.prefix = collection_prefix + # Prefix Priorität: Argument > Env > Default + self.prefix = collection_prefix or self.cfg.prefix or "mindnet" self.client = get_client(self.cfg) async def analyze_draft(self, text: str, current_type: str) -> Dict[str, Any]: """ Analysiert einen Draft-Text und schlägt Verlinkungen vor. - Kombiniert Exact Match (Titel) und Semantic Match (Vektor). + Kombiniert Exact Match (Titel/Alias) und Semantic Match (Vektor). """ suggestions = [] # 1. Exact Match: Finde Begriffe im Text, die als Notiz-Titel existieren - # (Bei sehr großen Vaults >10k sollte dies gecached werden) + # (Holt alle Titel aus Qdrant - bei riesigen Vaults später cachen) known_entities = self._fetch_all_titles_and_aliases() found_entities = self._find_entities_in_text(text, known_entities) @@ -42,23 +41,23 @@ class DiscoveryService: "target_title": entity["title"], "target_id": entity["id"], "confidence": 1.0, - "reason": "Existierender Notiz-Titel" + "reason": "Existierender Notiz-Titel/Alias" }) # 2. Semantic Match: Finde inhaltlich ähnliche Notizen via Vektor-Suche - # Wir filtern Ergebnisse heraus, die wir schon per Exact Match gefunden haben. semantic_hits = self._get_semantic_suggestions(text) for hit in semantic_hits: + # Duplikate vermeiden (wenn wir es schon per Titel gefunden haben) if hit.node_id in existing_target_ids: continue - # Schwellwert: Nur relevante Vorschläge anzeigen (z.B. > 0.65) - # Wir nutzen den total_score, der bereits Typ-Gewichte enthält. + # Schwellwert: Nur relevante Vorschläge + # total_score beinhaltet bereits Typ-Gewichte aus dem Retriever if hit.total_score > 0.65: suggestions.append({ "type": "semantic_match", - "text_found": (hit.source.get("text") or "")[:50] + "...", # Snippet + "text_found": (hit.source.get("text") or "")[:50] + "...", "target_title": hit.payload.get("title", "Unbekannt"), "target_id": hit.node_id, "confidence": round(hit.total_score, 2), @@ -79,7 +78,6 @@ class DiscoveryService: try: while True: - # Scroll API nutzen, um effizient alle Metadaten zu laden res, next_page = self.client.scroll( collection_name=col_name, limit=1000, @@ -89,10 +87,15 @@ class DiscoveryService: ) for point in res: pl = point.payload or {} + + # Aliases robust lesen + aliases = pl.get("aliases") or [] + if isinstance(aliases, str): aliases = [aliases] + notes.append({ "id": pl.get("note_id"), "title": pl.get("title"), - "aliases": pl.get("aliases", []) + "aliases": aliases }) if next_page is None: @@ -105,7 +108,7 @@ class DiscoveryService: def _find_entities_in_text(self, text: str, entities: List[Dict]) -> List[Dict]: """ - Sucht Vorkommen von Titeln im Text (Case-Insensitive). + Sucht Vorkommen von Titeln/Alias im Text (Case-Insensitive). """ found = [] text_lower = text.lower() @@ -119,7 +122,7 @@ class DiscoveryService: "title": title, "id": entity["id"] }) - continue # Wenn Titel gefunden, Aliases nicht mehr prüfen (Prio) + continue # Wenn Titel gefunden, Aliases nicht mehr prüfen # 2. Aliases prüfen aliases = entity.get("aliases") @@ -136,14 +139,13 @@ class DiscoveryService: def _get_semantic_suggestions(self, text: str): """Wrapper um den Hybrid Retriever.""" - # Wir nutzen eine vereinfachte Query req = QueryRequest( query=text, top_k=5, explain=False ) try: - # hybrid_retrieve ist sync, wird aber schnell genug sein für diesen Kontext + # hybrid_retrieve nutzen (sync Wrapper) res = hybrid_retrieve(req) return res.results except Exception as e: