scripts/import_markdown.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
f5e6fcc097
commit
466ea64e63
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges)
|
||||
Version: 3.5.1
|
||||
Version: 3.5.2
|
||||
Datum: 2025-09-09
|
||||
|
||||
Kurzbeschreibung
|
||||
|
|
@ -20,6 +20,12 @@ Kurzbeschreibung
|
|||
- Wenn sich die Signatur (z. B. body→full, parsed→raw, canonical→none) zwischen Alt/Neu unterscheidet,
|
||||
gilt die Note als **geändert** (Einmal-Update, um die neue Signatur zu persistieren).
|
||||
|
||||
Robustheit
|
||||
----------
|
||||
- Rückgaben aus ``make_note_payload`` werden **koerziert** (Tuple, Mapping, Pydantic v1/v2, Objekt) → ``dict``.
|
||||
- Bei Nicht-Erfolg präzise Debug-Ausgabe (Typname + kurzer Preview).
|
||||
- Defensive Fehlerbehandlung in allen Schritten (Parsing, Chunks, Edges, Upserts).
|
||||
|
||||
ENV / Qdrant
|
||||
------------
|
||||
- QDRANT_URL | QDRANT_HOST/QDRANT_PORT | QDRANT_API_KEY
|
||||
|
|
@ -43,12 +49,14 @@ Aufruf-Beispiele
|
|||
python3 -m scripts.import_markdown --vault ./vault --apply --compare-text
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import difflib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from collections.abc import Mapping
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from qdrant_client.http import models as rest
|
||||
|
|
@ -129,6 +137,52 @@ def _normalize_rel_path(abs_path: str, vault_root: str) -> str:
|
|||
rel = abs_path
|
||||
return rel.replace("\\", "/").lstrip("/")
|
||||
|
||||
def _coerce_to_dict(obj: Any) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Versucht, verschiedenartige Rückgaben (Mapping, Tuple, Pydantic, Objekt) in ein dict zu konvertieren.
|
||||
- dict → dict
|
||||
- Mapping → dict(obj)
|
||||
- (dict, ...) oder [dict, ...] → erster dict-ähnlicher Eintrag
|
||||
- Pydantic v2: .model_dump()
|
||||
- Pydantic v1: .dict()
|
||||
- Objekt mit __dict__ → dict(__dict__)
|
||||
- sonst: None
|
||||
"""
|
||||
if obj is None:
|
||||
return None
|
||||
if isinstance(obj, dict):
|
||||
return obj
|
||||
if isinstance(obj, Mapping):
|
||||
try:
|
||||
return dict(obj)
|
||||
except Exception:
|
||||
pass
|
||||
if isinstance(obj, (list, tuple)):
|
||||
for it in obj:
|
||||
d = _coerce_to_dict(it)
|
||||
if isinstance(d, dict):
|
||||
return d
|
||||
return None
|
||||
# Pydantic v2
|
||||
md = getattr(obj, "model_dump", None)
|
||||
if callable(md):
|
||||
try:
|
||||
return md()
|
||||
except Exception:
|
||||
pass
|
||||
# Pydantic v1
|
||||
dd = getattr(obj, "dict", None)
|
||||
if callable(dd):
|
||||
try:
|
||||
return dd()
|
||||
except Exception:
|
||||
pass
|
||||
# generischer Fallback
|
||||
dct = getattr(obj, "__dict__", None)
|
||||
if isinstance(dct, dict):
|
||||
return dict(dct)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Main
|
||||
|
|
@ -203,9 +257,9 @@ def main() -> None:
|
|||
|
||||
processed += 1
|
||||
|
||||
# -------------- Note-Payload (defensiv) --------------
|
||||
# -------------- Note-Payload (defensiv + Koerzierung) --------------
|
||||
try:
|
||||
note_pl = make_note_payload(
|
||||
note_pl_raw = make_note_payload(
|
||||
parsed,
|
||||
vault_root=root,
|
||||
hash_mode=args.hash_mode,
|
||||
|
|
@ -217,8 +271,18 @@ def main() -> None:
|
|||
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": f"make_note_payload failed: {e}"}))
|
||||
continue
|
||||
|
||||
note_pl = _coerce_to_dict(note_pl_raw)
|
||||
if not isinstance(note_pl, dict):
|
||||
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": "make_note_payload returned non-dict"}))
|
||||
preview = repr(note_pl_raw)
|
||||
if len(preview) > 240:
|
||||
preview = preview[:240] + "…"
|
||||
print(json.dumps({
|
||||
"path": path,
|
||||
"note_id": fm.get("id"),
|
||||
"error": "make_note_payload returned non-dict",
|
||||
"returned_type": type(note_pl_raw).__name__,
|
||||
"preview": preview
|
||||
}))
|
||||
continue
|
||||
|
||||
# fulltext sicherstellen + Pfad normalisieren
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user