2025-12-11 17:00:38 +01:00
2 changed files with 91 additions and 67 deletions
--- a/app/frontend/ui.py
+++ b/app/frontend/ui.py
@ -23,7 +23,7 @@ timeout_setting = os.getenv("MINDNET_API_TIMEOUT") or os.getenv("MINDNET_LLM_TIM
 API_TIMEOUT = float(timeout_setting) if timeout_setting else 300.0

 # --- PAGE SETUP ---
-st.set_page_config(page_title="mindnet v2.3.4", page_icon="🧠", layout="wide")
+st.set_page_config(page_title="mindnet v2.3.6", page_icon="🧠", layout="wide")

 # --- CSS STYLING ---
 st.markdown("""
@ -225,7 +225,7 @@ def submit_feedback(query_id, node_id, score, comment=None):
 def render_sidebar():
    with st.sidebar:
        st.title("🧠 mindnet")
-        st.caption("v2.3.4 | WP-10b (Intelligence)")
+        st.caption("v2.3.6 | WP-10b (Full)")
        mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
        st.divider()
        st.subheader("⚙️ Settings")
@ -463,22 +463,13 @@ def render_chat_interface(top_k, explain):
                st.rerun()

 def render_manual_editor():
-    st.header("📝 Manueller Editor")
-    c1, c2 = st.columns([1, 2])
-    n_type = c1.selectbox("Typ", ["concept", "project", "decision", "experience", "value", "goal"])
-    tags = c2.text_input("Tags")
-    body = st.text_area("Inhalt", height=400, placeholder="# Titel\n\nText...")
-    
-    if st.button("Speichern (Via API)"):
-        meta = {"type": n_type, "status": "draft", "tags": [t.strip() for t in tags.split(",")]}
-        doc = build_markdown_doc(meta, body)
-        
-        # Test Call
-        res = save_draft_to_vault(doc, filename=f"manual-{uuid.uuid4().hex[:6]}.md")
-        if "error" in res:
-            st.error(res["error"])
-        else:
-            st.success(f"Gespeichert: {res.get('file_path')}")
+    # Wir nutzen eine Fake-Message, um die render_draft_editor Logik wiederzuverwenden
+    # Aber mit leeren Defaults
+    mock_msg = {
+        "content": "---\ntype: default\nstatus: draft\ntitle: Neue Notiz\ntags: []\n---\n# Titel\n",
+        "query_id": "manual_mode_v2" # Feste ID für manuellen Modus
+    }
+    render_draft_editor(mock_msg)

 # --- MAIN ---
 mode, top_k, explain = render_sidebar()
--- a/scripts/import_markdown.py
+++ b/scripts/import_markdown.py
@ -1,67 +1,100 @@
 #!/usr/bin/env python3
 """
 scripts/import_markdown.py
-Refactored CLI-Wrapper für den IngestionService.
+CLI-Tool zum Importieren von Markdown-Dateien in Qdrant.
+Updated for Mindnet v2.3.6 (Async Ingestion Support).
 """
-import argparse
+import asyncio
 import os
-import json
-import sys
+import argparse
+import logging
+from pathlib import Path
 from dotenv import load_dotenv
+
+# Importiere den neuen Async Service
+# Stellen wir sicher, dass der Pfad stimmt (Pythonpath)
+import sys
+sys.path.append(os.getcwd())
+
 from app.core.ingestion import IngestionService

-def iter_md(root: str):
-    out = []
-    for dp, _, fns in os.walk(root):
-        for fn in fns:
-            if fn.endswith(".md") and "/.obsidian/" not in dp:
-                out.append(os.path.join(dp, fn).replace("\\", "/"))
-    return sorted(out)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+logger = logging.getLogger("importer")
+
+async def main_async(args):
+    vault_path = Path(args.vault).resolve()
+    if not vault_path.exists():
+        logger.error(f"Vault path does not exist: {vault_path}")
+        return
+
+    # Service initialisieren (startet Async Clients)
+    logger.info(f"Initializing IngestionService (Prefix: {args.prefix})")
+    service = IngestionService(collection_prefix=args.prefix)
+    
+    logger.info(f"Scanning {vault_path}...")
+    files = list(vault_path.rglob("*.md"))
+    # Exclude .obsidian folder if present
+    files = [f for f in files if ".obsidian" not in str(f)]
+    files.sort()
+    
+    logger.info(f"Found {len(files)} markdown files.")
+
+    stats = {"processed": 0, "skipped": 0, "errors": 0}
+
+    # Wir nutzen eine Semaphore, um nicht zu viele Files gleichzeitig zu öffnen/embedden
+    sem = asyncio.Semaphore(5) # Max 5 concurrent files to avoid OOM or Rate Limit
+
+    async def process_with_limit(f_path):
+        async with sem:
+            try:
+                res = await service.process_file(
+                    file_path=str(f_path),
+                    vault_root=str(vault_path),
+                    force_replace=args.force,
+                    apply=args.apply,
+                    purge_before=True
+                )
+                return res
+            except Exception as e:
+                return {"status": "error", "error": str(e), "path": str(f_path)}
+
+    # Batch Processing
+    # Wir verarbeiten in Chunks, um den Progress zu sehen
+    batch_size = 20
+    for i in range(0, len(files), batch_size):
+        batch = files[i:i+batch_size]
+        logger.info(f"Processing batch {i} to {i+len(batch)}...")
+        
+        tasks = [process_with_limit(f) for f in batch]
+        results = await asyncio.gather(*tasks)
+        
+        for res in results:
+            if res.get("status") == "success":
+                stats["processed"] += 1
+            elif res.get("status") == "error":
+                stats["errors"] += 1
+                logger.error(f"Error in {res.get('path')}: {res.get('error')}")
+            else:
+                stats["skipped"] += 1
+
+    logger.info(f"Done. Stats: {stats}")
+    if not args.apply:
+        logger.info("DRY RUN. Use --apply to write to DB.")

 def main():
    load_dotenv()
-    
-    # FIX: Default Prefix aus Environment holen, sonst Fallback auf "mindnet"
    default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")

-    ap = argparse.ArgumentParser()
-    ap.add_argument("--vault", required=True)
-    ap.add_argument("--apply", action="store_true")
-    ap.add_argument("--purge-before-upsert", action="store_true")
-    ap.add_argument("--force-replace", action="store_true")
+    parser = argparse.ArgumentParser(description="Import Vault to Qdrant (Async)")
+    parser.add_argument("--vault", default="./vault", help="Path to vault root")
+    parser.add_argument("--prefix", default=default_prefix, help="Collection prefix")
+    parser.add_argument("--force", action="store_true", help="Force re-index all files")
+    parser.add_argument("--apply", action="store_true", help="Perform writes to Qdrant")
    
-    # Hier nutzen wir jetzt die Variable
-    ap.add_argument("--prefix", default=default_prefix)
+    args = parser.parse_args()
    
-    args = ap.parse_args()
-
-    print(f"Init IngestionService (Prefix: {args.prefix})...")
-    service = IngestionService(collection_prefix=args.prefix)
-    
-    files = iter_md(os.path.abspath(args.vault))
-    print(f"Found {len(files)} files in vault.")
-    
-    processed = 0
-    errors = 0
-    
-    for f in files:
-        res = service.process_file(
-            file_path=f,
-            vault_root=os.path.abspath(args.vault),
-            apply=args.apply,
-            force_replace=args.force_replace,
-            purge_before=args.purge_before_upsert
-        )
-        
-        if res.get("status") not in ["skipped", "unchanged"]:
-            print(json.dumps(res, ensure_ascii=False))
-            processed += 1
-        
-        if res.get("error"):
-            print(json.dumps(res, ensure_ascii=False), file=sys.stderr)
-            errors += 1
-
-    print(f"Done. Processed/Changed: {processed}. Errors: {errors}")
+    # Starte den Async Loop
+    asyncio.run(main_async(args))

 if __name__ == "__main__":
    main()