From aa7d0190c8b43ee32d316fde5f546c151e4d3545 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 8 Sep 2025 18:04:11 +0200
Subject: [PATCH] scripts/export_markdown.py aktualisiert

---
 scripts/export_markdown.py | 59 ++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/scripts/export_markdown.py b/scripts/export_markdown.py
index 6773512..1cb5e30 100644
--- a/scripts/export_markdown.py
+++ b/scripts/export_markdown.py
@@ -1,7 +1,7 @@
 # scripts/export_markdown.py
 # -----------------------------------------------------------------------------
 # Name:        export_markdown.py
-# Version:     1.0.0 (2025-09-08)
+# Version:     1.0.1 (2025-09-08)
 # Zweck:       Exportiert Notes + Chunks aus Qdrant zurück in Markdown-Dateien.
 #
 # Was es macht:
@@ -11,26 +11,36 @@
 #   - Schreibt Dateien unter --out (Verzeichnis wird angelegt).
 #   - Verwendet, falls vorhanden, den Pfad aus payload.path; sonst Titel-basiert.
 #
-# Aufruf:
-#   # alle Notes exportieren:
-#   python3 -m scripts.export_markdown --prefix mindnet --out ./_export
+# Aufruf (im venv):
+#   # alle Notes exportieren (Prefix wird aus ENV COLLECTION_PREFIX gelesen):
+#   python3 -m scripts.export_markdown --out ./_export
+#
+#   # Prefix explizit per ENV überschreiben:
+#   COLLECTION_PREFIX=mindnet python3 -m scripts.export_markdown --out ./_export
 #
 #   # nur bestimmte Note-IDs exportieren:
-#   python3 -m scripts.export_markdown --prefix mindnet --out ./_export \
+#   python3 -m scripts.export_markdown --out ./_export \
 #       --note-id 20250821-architektur-ki-trainerassistent-761cfe \
 #       --note-id 20250821-personal-mind-ki-projekt-7b0d79
 #
 # Parameter:
-#   --prefix     : Collections-Präfix (Default: mindnet)
-#   --out        : Zielverzeichnis (wird erstellt)
+#   --out        : Zielverzeichnis (wird erstellt, Pflicht)
 #   --note-id    : Kann mehrfach angegeben werden; dann nur diese Notes
 #   --overwrite  : Existierende Dateien überschreiben (sonst überspringen)
 #
+# Umgebung:
+#   QDRANT_URL         (z. B. http://127.0.0.1:6333)
+#   QDRANT_API_KEY     (optional)
+#   COLLECTION_PREFIX  (Default in app/core/qdrant.py: "mindnet")
+#   VECTOR_DIM         (Default in app/core/qdrant.py: 384)
+#
 # Voraussetzungen:
 #   - Ausführung im aktivierten venv empfohlen: source .venv/bin/activate
-#   - Qdrant läuft lokal (oder URL/API-Key in ENV), siehe app/core/qdrant.py
+#   - Qdrant läuft (oder URL/API-Key in ENV), siehe app/core/qdrant.py
 #
 # Änderungen:
+#   - 1.0.1: Nutzt QdrantConfig.from_env() ohne Parameter; liest Prefix aus ENV.
+#            Passt collection_names()-Nutzung (Tupel) korrekt an.
 #   - 1.0.0: Erster Release.
 # -----------------------------------------------------------------------------
 
@@ -39,7 +49,7 @@ import json
 import os
 import re
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional
 
 from qdrant_client import QdrantClient
 from qdrant_client.http import models as rest
@@ -49,14 +59,12 @@ from app.core.qdrant import QdrantConfig, get_client, collection_names
 
 def to_yaml_frontmatter(fm: Dict) -> str:
     """Serialisiert ein Python-Dict als YAML-Frontmatter (einfach, stabil)."""
-    # Nur bekannte Felder in definierter Reihenfolge
     ordered_keys = [
         "id", "note_id", "title", "type", "status",
         "created", "updated", "path", "tags",
         "area", "project", "source", "lang", "slug",
     ]
     lines: List[str] = ["---"]
-    # normiere: id-Feld (falls nur note_id existiert)
     m = dict(fm)
     if "id" not in m and "note_id" in m:
         m["id"] = m["note_id"]
@@ -89,17 +97,13 @@ def choose_output_path(out_dir: Path, fm: Dict) -> Path:
 
 
 def fetch_all_notes(client: QdrantClient, notes_col: str, only_ids: Optional[List[str]]) -> List[Dict]:
-    """scrollt alle Notes (optional gefiltert). Rückgabe: List[Payload-Dicts]."""
+    """Scrollt alle Notes (optional gefiltert). Rückgabe: List[Payload-Dicts]."""
     results: List[Dict] = []
     offset = None
     flt = None
     if only_ids:
-        # Filter: note_id IN [...]
         flt = rest.Filter(
-            should=[
-                rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid))
-                for nid in only_ids
-            ]
+            should=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid)) for nid in only_ids]
         )
 
     while True:
@@ -123,9 +127,7 @@ def fetch_all_notes(client: QdrantClient, notes_col: str, only_ids: Optional[Lis
 def fetch_chunks_for_note(client: QdrantClient, chunks_col: str, note_id: str) -> List[Dict]:
     res: List[Dict] = []
     offset = None
-    flt = rest.Filter(
-        must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))]
-    )
+    flt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
     while True:
         pts, next_offset = client.scroll(
             collection_name=chunks_col,
@@ -141,8 +143,6 @@ def fetch_chunks_for_note(client: QdrantClient, chunks_col: str, note_id: str) -
         if next_offset is None:
             break
         offset = next_offset
-
-    # sortiere nach seq, falls vorhanden
     res.sort(key=lambda x: x.get("seq", 0))
     return res
 
@@ -164,7 +164,6 @@ def write_note_as_markdown(out_dir: Path, note_payload: Dict, chunks: List[Dict]
 
     frontmatter = to_yaml_frontmatter(note_payload)
     body = assemble_body_from_chunks(chunks)
-
     content = f"{frontmatter}\n{body}"
     out_path.write_text(content, encoding="utf-8")
     return out_path
@@ -172,7 +171,6 @@ def write_note_as_markdown(out_dir: Path, note_payload: Dict, chunks: List[Dict]
 
 def main():
     ap = argparse.ArgumentParser(description="Exportiert Notes+Chunks aus Qdrant in Markdown-Dateien.")
-    ap.add_argument("--prefix", default="mindnet", help="Collections-Präfix (Default: mindnet)")
     ap.add_argument("--out", required=True, help="Zielverzeichnis für exportierte .md-Dateien")
     ap.add_argument("--note-id", action="append", help="Spezifische Note-ID exportieren (mehrfach möglich)")
     ap.add_argument("--overwrite", action="store_true", help="Existierende Dateien überschreiben")
@@ -181,21 +179,20 @@ def main():
     out_dir = Path(args.out).resolve()
     out_dir.mkdir(parents=True, exist_ok=True)
 
-        # korrekt
-    cfg = QdrantConfig.from_env(prefix=args.prefix)
+    # Wichtig: Prefix & Co. kommen aus ENV via from_env()
+    cfg = QdrantConfig.from_env()
     client = get_client(cfg)
-    cols = collection_names(args.prefix)
+    notes_col, chunks_col, _edges_col = collection_names(cfg.prefix)
 
-
-    notes = fetch_all_notes(client, cols["notes"], args.note_id)
+    notes = fetch_all_notes(client, notes_col, args.note_id)
     if not notes:
-        print("Keine Notes in Qdrant gefunden (Filter zu streng?).")
+        print("Keine Notes in Qdrant gefunden (oder Filter zu streng).")
         return
 
     exported = []
     for np in notes:
         nid = np.get("note_id") or np.get("id")
-        chunks = fetch_chunks_for_note(client, cols["chunks"], note_id=str(nid))
+        chunks = fetch_chunks_for_note(client, chunks_col, note_id=str(nid))
         path = write_note_as_markdown(out_dir, np, chunks, overwrite=args.overwrite)
         exported.append({"note_id": nid, "path": str(path)})