WP11 #8
|
|
@ -23,7 +23,7 @@ timeout_setting = os.getenv("MINDNET_API_TIMEOUT") or os.getenv("MINDNET_LLM_TIM
|
||||||
API_TIMEOUT = float(timeout_setting) if timeout_setting else 300.0
|
API_TIMEOUT = float(timeout_setting) if timeout_setting else 300.0
|
||||||
|
|
||||||
# --- PAGE SETUP ---
|
# --- PAGE SETUP ---
|
||||||
st.set_page_config(page_title="mindnet v2.3.4", page_icon="🧠", layout="wide")
|
st.set_page_config(page_title="mindnet v2.3.6", page_icon="🧠", layout="wide")
|
||||||
|
|
||||||
# --- CSS STYLING ---
|
# --- CSS STYLING ---
|
||||||
st.markdown("""
|
st.markdown("""
|
||||||
|
|
@ -225,7 +225,7 @@ def submit_feedback(query_id, node_id, score, comment=None):
|
||||||
def render_sidebar():
|
def render_sidebar():
|
||||||
with st.sidebar:
|
with st.sidebar:
|
||||||
st.title("🧠 mindnet")
|
st.title("🧠 mindnet")
|
||||||
st.caption("v2.3.4 | WP-10b (Intelligence)")
|
st.caption("v2.3.6 | WP-10b (Full)")
|
||||||
mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
|
mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
|
||||||
st.divider()
|
st.divider()
|
||||||
st.subheader("⚙️ Settings")
|
st.subheader("⚙️ Settings")
|
||||||
|
|
@ -463,22 +463,13 @@ def render_chat_interface(top_k, explain):
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
def render_manual_editor():
|
def render_manual_editor():
|
||||||
st.header("📝 Manueller Editor")
|
# Wir nutzen eine Fake-Message, um die render_draft_editor Logik wiederzuverwenden
|
||||||
c1, c2 = st.columns([1, 2])
|
# Aber mit leeren Defaults
|
||||||
n_type = c1.selectbox("Typ", ["concept", "project", "decision", "experience", "value", "goal"])
|
mock_msg = {
|
||||||
tags = c2.text_input("Tags")
|
"content": "---\ntype: default\nstatus: draft\ntitle: Neue Notiz\ntags: []\n---\n# Titel\n",
|
||||||
body = st.text_area("Inhalt", height=400, placeholder="# Titel\n\nText...")
|
"query_id": "manual_mode_v2" # Feste ID für manuellen Modus
|
||||||
|
}
|
||||||
if st.button("Speichern (Via API)"):
|
render_draft_editor(mock_msg)
|
||||||
meta = {"type": n_type, "status": "draft", "tags": [t.strip() for t in tags.split(",")]}
|
|
||||||
doc = build_markdown_doc(meta, body)
|
|
||||||
|
|
||||||
# Test Call
|
|
||||||
res = save_draft_to_vault(doc, filename=f"manual-{uuid.uuid4().hex[:6]}.md")
|
|
||||||
if "error" in res:
|
|
||||||
st.error(res["error"])
|
|
||||||
else:
|
|
||||||
st.success(f"Gespeichert: {res.get('file_path')}")
|
|
||||||
|
|
||||||
# --- MAIN ---
|
# --- MAIN ---
|
||||||
mode, top_k, explain = render_sidebar()
|
mode, top_k, explain = render_sidebar()
|
||||||
|
|
|
||||||
|
|
@ -1,67 +1,100 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
scripts/import_markdown.py
|
scripts/import_markdown.py
|
||||||
Refactored CLI-Wrapper für den IngestionService.
|
CLI-Tool zum Importieren von Markdown-Dateien in Qdrant.
|
||||||
|
Updated for Mindnet v2.3.6 (Async Ingestion Support).
|
||||||
"""
|
"""
|
||||||
import argparse
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import json
|
import argparse
|
||||||
import sys
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Importiere den neuen Async Service
|
||||||
|
# Stellen wir sicher, dass der Pfad stimmt (Pythonpath)
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.getcwd())
|
||||||
|
|
||||||
from app.core.ingestion import IngestionService
|
from app.core.ingestion import IngestionService
|
||||||
|
|
||||||
def iter_md(root: str):
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||||
out = []
|
logger = logging.getLogger("importer")
|
||||||
for dp, _, fns in os.walk(root):
|
|
||||||
for fn in fns:
|
async def main_async(args):
|
||||||
if fn.endswith(".md") and "/.obsidian/" not in dp:
|
vault_path = Path(args.vault).resolve()
|
||||||
out.append(os.path.join(dp, fn).replace("\\", "/"))
|
if not vault_path.exists():
|
||||||
return sorted(out)
|
logger.error(f"Vault path does not exist: {vault_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Service initialisieren (startet Async Clients)
|
||||||
|
logger.info(f"Initializing IngestionService (Prefix: {args.prefix})")
|
||||||
|
service = IngestionService(collection_prefix=args.prefix)
|
||||||
|
|
||||||
|
logger.info(f"Scanning {vault_path}...")
|
||||||
|
files = list(vault_path.rglob("*.md"))
|
||||||
|
# Exclude .obsidian folder if present
|
||||||
|
files = [f for f in files if ".obsidian" not in str(f)]
|
||||||
|
files.sort()
|
||||||
|
|
||||||
|
logger.info(f"Found {len(files)} markdown files.")
|
||||||
|
|
||||||
|
stats = {"processed": 0, "skipped": 0, "errors": 0}
|
||||||
|
|
||||||
|
# Wir nutzen eine Semaphore, um nicht zu viele Files gleichzeitig zu öffnen/embedden
|
||||||
|
sem = asyncio.Semaphore(5) # Max 5 concurrent files to avoid OOM or Rate Limit
|
||||||
|
|
||||||
|
async def process_with_limit(f_path):
|
||||||
|
async with sem:
|
||||||
|
try:
|
||||||
|
res = await service.process_file(
|
||||||
|
file_path=str(f_path),
|
||||||
|
vault_root=str(vault_path),
|
||||||
|
force_replace=args.force,
|
||||||
|
apply=args.apply,
|
||||||
|
purge_before=True
|
||||||
|
)
|
||||||
|
return res
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "error", "error": str(e), "path": str(f_path)}
|
||||||
|
|
||||||
|
# Batch Processing
|
||||||
|
# Wir verarbeiten in Chunks, um den Progress zu sehen
|
||||||
|
batch_size = 20
|
||||||
|
for i in range(0, len(files), batch_size):
|
||||||
|
batch = files[i:i+batch_size]
|
||||||
|
logger.info(f"Processing batch {i} to {i+len(batch)}...")
|
||||||
|
|
||||||
|
tasks = [process_with_limit(f) for f in batch]
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
for res in results:
|
||||||
|
if res.get("status") == "success":
|
||||||
|
stats["processed"] += 1
|
||||||
|
elif res.get("status") == "error":
|
||||||
|
stats["errors"] += 1
|
||||||
|
logger.error(f"Error in {res.get('path')}: {res.get('error')}")
|
||||||
|
else:
|
||||||
|
stats["skipped"] += 1
|
||||||
|
|
||||||
|
logger.info(f"Done. Stats: {stats}")
|
||||||
|
if not args.apply:
|
||||||
|
logger.info("DRY RUN. Use --apply to write to DB.")
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# FIX: Default Prefix aus Environment holen, sonst Fallback auf "mindnet"
|
|
||||||
default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
||||||
|
|
||||||
ap = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser(description="Import Vault to Qdrant (Async)")
|
||||||
ap.add_argument("--vault", required=True)
|
parser.add_argument("--vault", default="./vault", help="Path to vault root")
|
||||||
ap.add_argument("--apply", action="store_true")
|
parser.add_argument("--prefix", default=default_prefix, help="Collection prefix")
|
||||||
ap.add_argument("--purge-before-upsert", action="store_true")
|
parser.add_argument("--force", action="store_true", help="Force re-index all files")
|
||||||
ap.add_argument("--force-replace", action="store_true")
|
parser.add_argument("--apply", action="store_true", help="Perform writes to Qdrant")
|
||||||
|
|
||||||
# Hier nutzen wir jetzt die Variable
|
args = parser.parse_args()
|
||||||
ap.add_argument("--prefix", default=default_prefix)
|
|
||||||
|
|
||||||
args = ap.parse_args()
|
# Starte den Async Loop
|
||||||
|
asyncio.run(main_async(args))
|
||||||
print(f"Init IngestionService (Prefix: {args.prefix})...")
|
|
||||||
service = IngestionService(collection_prefix=args.prefix)
|
|
||||||
|
|
||||||
files = iter_md(os.path.abspath(args.vault))
|
|
||||||
print(f"Found {len(files)} files in vault.")
|
|
||||||
|
|
||||||
processed = 0
|
|
||||||
errors = 0
|
|
||||||
|
|
||||||
for f in files:
|
|
||||||
res = service.process_file(
|
|
||||||
file_path=f,
|
|
||||||
vault_root=os.path.abspath(args.vault),
|
|
||||||
apply=args.apply,
|
|
||||||
force_replace=args.force_replace,
|
|
||||||
purge_before=args.purge_before_upsert
|
|
||||||
)
|
|
||||||
|
|
||||||
if res.get("status") not in ["skipped", "unchanged"]:
|
|
||||||
print(json.dumps(res, ensure_ascii=False))
|
|
||||||
processed += 1
|
|
||||||
|
|
||||||
if res.get("error"):
|
|
||||||
print(json.dumps(res, ensure_ascii=False), file=sys.stderr)
|
|
||||||
errors += 1
|
|
||||||
|
|
||||||
print(f"Done. Processed/Changed: {processed}. Errors: {errors}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user