mindnet/scripts/import_markdown.py
2025-12-10 22:22:56 +01:00

67 lines
1.9 KiB
Python

#!/usr/bin/env python3
"""
scripts/import_markdown.py
Refactored CLI-Wrapper für den IngestionService.
"""
import argparse
import os
import json
import sys
from dotenv import load_dotenv
from app.core.ingestion import IngestionService
def iter_md(root: str):
out = []
for dp, _, fns in os.walk(root):
for fn in fns:
if fn.endswith(".md") and "/.obsidian/" not in dp:
out.append(os.path.join(dp, fn).replace("\\", "/"))
return sorted(out)
def main():
load_dotenv()
# FIX: Default Prefix aus Environment holen, sonst Fallback auf "mindnet"
default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
ap = argparse.ArgumentParser()
ap.add_argument("--vault", required=True)
ap.add_argument("--apply", action="store_true")
ap.add_argument("--purge-before-upsert", action="store_true")
ap.add_argument("--force-replace", action="store_true")
# Hier nutzen wir jetzt die Variable
ap.add_argument("--prefix", default=default_prefix)
args = ap.parse_args()
print(f"Init IngestionService (Prefix: {args.prefix})...")
service = IngestionService(collection_prefix=args.prefix)
files = iter_md(os.path.abspath(args.vault))
print(f"Found {len(files)} files in vault.")
processed = 0
errors = 0
for f in files:
res = service.process_file(
file_path=f,
vault_root=os.path.abspath(args.vault),
apply=args.apply,
force_replace=args.force_replace,
purge_before=args.purge_before_upsert
)
if res.get("status") not in ["skipped", "unchanged"]:
print(json.dumps(res, ensure_ascii=False))
processed += 1
if res.get("error"):
print(json.dumps(res, ensure_ascii=False), file=sys.stderr)
errors += 1
print(f"Done. Processed/Changed: {processed}. Errors: {errors}")
if __name__ == "__main__":
main()