WP11 #8
|
|
@ -23,7 +23,7 @@ timeout_setting = os.getenv("MINDNET_API_TIMEOUT") or os.getenv("MINDNET_LLM_TIM
|
|||
API_TIMEOUT = float(timeout_setting) if timeout_setting else 300.0
|
||||
|
||||
# --- PAGE SETUP ---
|
||||
st.set_page_config(page_title="mindnet v2.3.4", page_icon="🧠", layout="wide")
|
||||
st.set_page_config(page_title="mindnet v2.3.6", page_icon="🧠", layout="wide")
|
||||
|
||||
# --- CSS STYLING ---
|
||||
st.markdown("""
|
||||
|
|
@ -225,7 +225,7 @@ def submit_feedback(query_id, node_id, score, comment=None):
|
|||
def render_sidebar():
|
||||
with st.sidebar:
|
||||
st.title("🧠 mindnet")
|
||||
st.caption("v2.3.4 | WP-10b (Intelligence)")
|
||||
st.caption("v2.3.6 | WP-10b (Full)")
|
||||
mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
|
||||
st.divider()
|
||||
st.subheader("⚙️ Settings")
|
||||
|
|
@ -463,22 +463,13 @@ def render_chat_interface(top_k, explain):
|
|||
st.rerun()
|
||||
|
||||
def render_manual_editor():
|
||||
st.header("📝 Manueller Editor")
|
||||
c1, c2 = st.columns([1, 2])
|
||||
n_type = c1.selectbox("Typ", ["concept", "project", "decision", "experience", "value", "goal"])
|
||||
tags = c2.text_input("Tags")
|
||||
body = st.text_area("Inhalt", height=400, placeholder="# Titel\n\nText...")
|
||||
|
||||
if st.button("Speichern (Via API)"):
|
||||
meta = {"type": n_type, "status": "draft", "tags": [t.strip() for t in tags.split(",")]}
|
||||
doc = build_markdown_doc(meta, body)
|
||||
|
||||
# Test Call
|
||||
res = save_draft_to_vault(doc, filename=f"manual-{uuid.uuid4().hex[:6]}.md")
|
||||
if "error" in res:
|
||||
st.error(res["error"])
|
||||
else:
|
||||
st.success(f"Gespeichert: {res.get('file_path')}")
|
||||
# Wir nutzen eine Fake-Message, um die render_draft_editor Logik wiederzuverwenden
|
||||
# Aber mit leeren Defaults
|
||||
mock_msg = {
|
||||
"content": "---\ntype: default\nstatus: draft\ntitle: Neue Notiz\ntags: []\n---\n# Titel\n",
|
||||
"query_id": "manual_mode_v2" # Feste ID für manuellen Modus
|
||||
}
|
||||
render_draft_editor(mock_msg)
|
||||
|
||||
# --- MAIN ---
|
||||
mode, top_k, explain = render_sidebar()
|
||||
|
|
|
|||
|
|
@ -1,67 +1,100 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
scripts/import_markdown.py
|
||||
Refactored CLI-Wrapper für den IngestionService.
|
||||
CLI-Tool zum Importieren von Markdown-Dateien in Qdrant.
|
||||
Updated for Mindnet v2.3.6 (Async Ingestion Support).
|
||||
"""
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Importiere den neuen Async Service
|
||||
# Stellen wir sicher, dass der Pfad stimmt (Pythonpath)
|
||||
import sys
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
from app.core.ingestion import IngestionService
|
||||
|
||||
def iter_md(root: str):
|
||||
out = []
|
||||
for dp, _, fns in os.walk(root):
|
||||
for fn in fns:
|
||||
if fn.endswith(".md") and "/.obsidian/" not in dp:
|
||||
out.append(os.path.join(dp, fn).replace("\\", "/"))
|
||||
return sorted(out)
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
logger = logging.getLogger("importer")
|
||||
|
||||
async def main_async(args):
|
||||
vault_path = Path(args.vault).resolve()
|
||||
if not vault_path.exists():
|
||||
logger.error(f"Vault path does not exist: {vault_path}")
|
||||
return
|
||||
|
||||
# Service initialisieren (startet Async Clients)
|
||||
logger.info(f"Initializing IngestionService (Prefix: {args.prefix})")
|
||||
service = IngestionService(collection_prefix=args.prefix)
|
||||
|
||||
logger.info(f"Scanning {vault_path}...")
|
||||
files = list(vault_path.rglob("*.md"))
|
||||
# Exclude .obsidian folder if present
|
||||
files = [f for f in files if ".obsidian" not in str(f)]
|
||||
files.sort()
|
||||
|
||||
logger.info(f"Found {len(files)} markdown files.")
|
||||
|
||||
stats = {"processed": 0, "skipped": 0, "errors": 0}
|
||||
|
||||
# Wir nutzen eine Semaphore, um nicht zu viele Files gleichzeitig zu öffnen/embedden
|
||||
sem = asyncio.Semaphore(5) # Max 5 concurrent files to avoid OOM or Rate Limit
|
||||
|
||||
async def process_with_limit(f_path):
|
||||
async with sem:
|
||||
try:
|
||||
res = await service.process_file(
|
||||
file_path=str(f_path),
|
||||
vault_root=str(vault_path),
|
||||
force_replace=args.force,
|
||||
apply=args.apply,
|
||||
purge_before=True
|
||||
)
|
||||
return res
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e), "path": str(f_path)}
|
||||
|
||||
# Batch Processing
|
||||
# Wir verarbeiten in Chunks, um den Progress zu sehen
|
||||
batch_size = 20
|
||||
for i in range(0, len(files), batch_size):
|
||||
batch = files[i:i+batch_size]
|
||||
logger.info(f"Processing batch {i} to {i+len(batch)}...")
|
||||
|
||||
tasks = [process_with_limit(f) for f in batch]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
for res in results:
|
||||
if res.get("status") == "success":
|
||||
stats["processed"] += 1
|
||||
elif res.get("status") == "error":
|
||||
stats["errors"] += 1
|
||||
logger.error(f"Error in {res.get('path')}: {res.get('error')}")
|
||||
else:
|
||||
stats["skipped"] += 1
|
||||
|
||||
logger.info(f"Done. Stats: {stats}")
|
||||
if not args.apply:
|
||||
logger.info("DRY RUN. Use --apply to write to DB.")
|
||||
|
||||
def main():
|
||||
load_dotenv()
|
||||
|
||||
# FIX: Default Prefix aus Environment holen, sonst Fallback auf "mindnet"
|
||||
default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--vault", required=True)
|
||||
ap.add_argument("--apply", action="store_true")
|
||||
ap.add_argument("--purge-before-upsert", action="store_true")
|
||||
ap.add_argument("--force-replace", action="store_true")
|
||||
parser = argparse.ArgumentParser(description="Import Vault to Qdrant (Async)")
|
||||
parser.add_argument("--vault", default="./vault", help="Path to vault root")
|
||||
parser.add_argument("--prefix", default=default_prefix, help="Collection prefix")
|
||||
parser.add_argument("--force", action="store_true", help="Force re-index all files")
|
||||
parser.add_argument("--apply", action="store_true", help="Perform writes to Qdrant")
|
||||
|
||||
# Hier nutzen wir jetzt die Variable
|
||||
ap.add_argument("--prefix", default=default_prefix)
|
||||
args = parser.parse_args()
|
||||
|
||||
args = ap.parse_args()
|
||||
|
||||
print(f"Init IngestionService (Prefix: {args.prefix})...")
|
||||
service = IngestionService(collection_prefix=args.prefix)
|
||||
|
||||
files = iter_md(os.path.abspath(args.vault))
|
||||
print(f"Found {len(files)} files in vault.")
|
||||
|
||||
processed = 0
|
||||
errors = 0
|
||||
|
||||
for f in files:
|
||||
res = service.process_file(
|
||||
file_path=f,
|
||||
vault_root=os.path.abspath(args.vault),
|
||||
apply=args.apply,
|
||||
force_replace=args.force_replace,
|
||||
purge_before=args.purge_before_upsert
|
||||
)
|
||||
|
||||
if res.get("status") not in ["skipped", "unchanged"]:
|
||||
print(json.dumps(res, ensure_ascii=False))
|
||||
processed += 1
|
||||
|
||||
if res.get("error"):
|
||||
print(json.dumps(res, ensure_ascii=False), file=sys.stderr)
|
||||
errors += 1
|
||||
|
||||
print(f"Done. Processed/Changed: {processed}. Errors: {errors}")
|
||||
# Starte den Async Loop
|
||||
asyncio.run(main_async(args))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user