scripts/import_markdown.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s

This commit is contained in:
Lars 2025-09-09 16:39:48 +02:00
parent f5e6fcc097
commit 466ea64e63

View File

@ -2,7 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Script: scripts/import_markdown.py Markdown Qdrant (Notes, Chunks, Edges) Script: scripts/import_markdown.py Markdown Qdrant (Notes, Chunks, Edges)
Version: 3.5.1 Version: 3.5.2
Datum: 2025-09-09 Datum: 2025-09-09
Kurzbeschreibung Kurzbeschreibung
@ -20,6 +20,12 @@ Kurzbeschreibung
- Wenn sich die Signatur (z. B. bodyfull, parsedraw, canonicalnone) zwischen Alt/Neu unterscheidet, - Wenn sich die Signatur (z. B. bodyfull, parsedraw, canonicalnone) zwischen Alt/Neu unterscheidet,
gilt die Note als **geändert** (Einmal-Update, um die neue Signatur zu persistieren). gilt die Note als **geändert** (Einmal-Update, um die neue Signatur zu persistieren).
Robustheit
----------
- Rückgaben aus ``make_note_payload`` werden **koerziert** (Tuple, Mapping, Pydantic v1/v2, Objekt) ``dict``.
- Bei Nicht-Erfolg präzise Debug-Ausgabe (Typname + kurzer Preview).
- Defensive Fehlerbehandlung in allen Schritten (Parsing, Chunks, Edges, Upserts).
ENV / Qdrant ENV / Qdrant
------------ ------------
- QDRANT_URL | QDRANT_HOST/QDRANT_PORT | QDRANT_API_KEY - QDRANT_URL | QDRANT_HOST/QDRANT_PORT | QDRANT_API_KEY
@ -43,12 +49,14 @@ Aufruf-Beispiele
python3 -m scripts.import_markdown --vault ./vault --apply --compare-text python3 -m scripts.import_markdown --vault ./vault --apply --compare-text
""" """
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import difflib import difflib
import json import json
import os import os
import sys import sys
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple, Any
from collections.abc import Mapping
from dotenv import load_dotenv from dotenv import load_dotenv
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
@ -129,6 +137,52 @@ def _normalize_rel_path(abs_path: str, vault_root: str) -> str:
rel = abs_path rel = abs_path
return rel.replace("\\", "/").lstrip("/") return rel.replace("\\", "/").lstrip("/")
def _coerce_to_dict(obj: Any) -> Optional[Dict[str, Any]]:
"""
Versucht, verschiedenartige Rückgaben (Mapping, Tuple, Pydantic, Objekt) in ein dict zu konvertieren.
- dict dict
- Mapping dict(obj)
- (dict, ...) oder [dict, ...] erster dict-ähnlicher Eintrag
- Pydantic v2: .model_dump()
- Pydantic v1: .dict()
- Objekt mit __dict__ dict(__dict__)
- sonst: None
"""
if obj is None:
return None
if isinstance(obj, dict):
return obj
if isinstance(obj, Mapping):
try:
return dict(obj)
except Exception:
pass
if isinstance(obj, (list, tuple)):
for it in obj:
d = _coerce_to_dict(it)
if isinstance(d, dict):
return d
return None
# Pydantic v2
md = getattr(obj, "model_dump", None)
if callable(md):
try:
return md()
except Exception:
pass
# Pydantic v1
dd = getattr(obj, "dict", None)
if callable(dd):
try:
return dd()
except Exception:
pass
# generischer Fallback
dct = getattr(obj, "__dict__", None)
if isinstance(dct, dict):
return dict(dct)
return None
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
# Main # Main
@ -203,9 +257,9 @@ def main() -> None:
processed += 1 processed += 1
# -------------- Note-Payload (defensiv) -------------- # -------------- Note-Payload (defensiv + Koerzierung) --------------
try: try:
note_pl = make_note_payload( note_pl_raw = make_note_payload(
parsed, parsed,
vault_root=root, vault_root=root,
hash_mode=args.hash_mode, hash_mode=args.hash_mode,
@ -217,8 +271,18 @@ def main() -> None:
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": f"make_note_payload failed: {e}"})) print(json.dumps({"path": path, "note_id": fm.get("id"), "error": f"make_note_payload failed: {e}"}))
continue continue
note_pl = _coerce_to_dict(note_pl_raw)
if not isinstance(note_pl, dict): if not isinstance(note_pl, dict):
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": "make_note_payload returned non-dict"})) preview = repr(note_pl_raw)
if len(preview) > 240:
preview = preview[:240] + ""
print(json.dumps({
"path": path,
"note_id": fm.get("id"),
"error": "make_note_payload returned non-dict",
"returned_type": type(note_pl_raw).__name__,
"preview": preview
}))
continue continue
# fulltext sicherstellen + Pfad normalisieren # fulltext sicherstellen + Pfad normalisieren