#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ FILE: scripts/import_markdown.py VERSION: 2.6.0 (2026-01-10) STATUS: Active (Core) COMPATIBILITY: IngestionProcessor v3.3.7+ Zweck: Hauptwerkzeug zum Importieren von Markdown-Dateien. Implementiert die globale 2-Phasen-Schreibstrategie. """ import asyncio import os import argparse import logging import sys from pathlib import Path from dotenv import load_dotenv # Root Logger Setup logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') sys.path.append(os.getcwd()) from app.core.ingestion import IngestionService from app.core.parser import pre_scan_markdown logger = logging.getLogger("importer") async def main_async(args): vault_path = Path(args.vault).resolve() if not vault_path.exists(): logger.error(f"Vault path does not exist: {vault_path}") return logger.info(f"Initializing IngestionService (Prefix: {args.prefix})") service = IngestionService(collection_prefix=args.prefix) logger.info(f"Scanning {vault_path}...") all_files = list(vault_path.rglob("*.md")) # --- GLOBALER ORDNER-FILTER --- files = [] ignore_folders = [".trash", ".obsidian", ".sync", "templates", "_system"] for f in all_files: f_str = str(f) if not any(folder in f_str for folder in ignore_folders) and not "/." in f_str: files.append(f) files.sort() logger.info(f"Found {len(files)} relevant markdown files.") # ========================================================================= # PASS 1: Global Pre-Scan # ========================================================================= logger.info(f"🔍 [Pass 1] Pre-scanning files for global context cache...") for f_path in files: try: ctx = pre_scan_markdown(str(f_path)) if ctx: service.batch_cache[ctx.note_id] = ctx service.batch_cache[ctx.title] = ctx fname = os.path.splitext(f_path.name)[0] service.batch_cache[fname] = ctx except Exception: pass # ========================================================================= # PHASE 1: Batch-Import (Notes & Explicit Edges) # ========================================================================= stats = {"processed": 0, "skipped": 0, "errors": 0} sem = asyncio.Semaphore(5) async def process_with_limit(f_path): async with sem: try: # Nutzt process_file (v3.3.7) return await service.process_file( file_path=str(f_path), vault_root=str(vault_path), force_replace=args.force, apply=args.apply, purge_before=True ) except Exception as e: return {"status": "error", "error": str(e), "path": str(f_path)} batch_size = 20 for i in range(0, len(files), batch_size): batch = files[i:i+batch_size] logger.info(f"--- Processing Batch {i//batch_size + 1} ---") tasks = [process_with_limit(f) for f in batch] results = await asyncio.gather(*tasks) for res in results: if res.get("status") == "success": stats["processed"] += 1 elif res.get("status") == "error": stats["errors"] += 1 else: stats["skipped"] += 1 # ========================================================================= # PHASE 2: Global Symmetry Injection (Nach Abschluss aller Batches) # ========================================================================= if args.apply: logger.info(f"🔄 [Phase 2] Starting global symmetry injection for the entire vault...") sym_res = await service.commit_vault_symmetries() if sym_res.get("status") == "success": logger.info(f"✅ Finished global symmetry injection. Added: {sym_res.get('added', 0)}") logger.info(f"Final Stats: {stats}") def main(): load_dotenv() default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet") parser = argparse.ArgumentParser() parser.add_argument("--vault", default="./vault") parser.add_argument("--prefix", default=default_prefix) parser.add_argument("--force", action="store_true") parser.add_argument("--apply", action="store_true") args = parser.parse_args() try: asyncio.run(main_async(args)) except Exception as e: logger.critical(f"FATAL ERROR: {e}") if __name__ == "__main__": main()