scripts/import_markdown.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
f5e6fcc097
commit
466ea64e63
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges)
|
Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges)
|
||||||
Version: 3.5.1
|
Version: 3.5.2
|
||||||
Datum: 2025-09-09
|
Datum: 2025-09-09
|
||||||
|
|
||||||
Kurzbeschreibung
|
Kurzbeschreibung
|
||||||
|
|
@ -20,6 +20,12 @@ Kurzbeschreibung
|
||||||
- Wenn sich die Signatur (z. B. body→full, parsed→raw, canonical→none) zwischen Alt/Neu unterscheidet,
|
- Wenn sich die Signatur (z. B. body→full, parsed→raw, canonical→none) zwischen Alt/Neu unterscheidet,
|
||||||
gilt die Note als **geändert** (Einmal-Update, um die neue Signatur zu persistieren).
|
gilt die Note als **geändert** (Einmal-Update, um die neue Signatur zu persistieren).
|
||||||
|
|
||||||
|
Robustheit
|
||||||
|
----------
|
||||||
|
- Rückgaben aus ``make_note_payload`` werden **koerziert** (Tuple, Mapping, Pydantic v1/v2, Objekt) → ``dict``.
|
||||||
|
- Bei Nicht-Erfolg präzise Debug-Ausgabe (Typname + kurzer Preview).
|
||||||
|
- Defensive Fehlerbehandlung in allen Schritten (Parsing, Chunks, Edges, Upserts).
|
||||||
|
|
||||||
ENV / Qdrant
|
ENV / Qdrant
|
||||||
------------
|
------------
|
||||||
- QDRANT_URL | QDRANT_HOST/QDRANT_PORT | QDRANT_API_KEY
|
- QDRANT_URL | QDRANT_HOST/QDRANT_PORT | QDRANT_API_KEY
|
||||||
|
|
@ -43,12 +49,14 @@ Aufruf-Beispiele
|
||||||
python3 -m scripts.import_markdown --vault ./vault --apply --compare-text
|
python3 -m scripts.import_markdown --vault ./vault --apply --compare-text
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import difflib
|
import difflib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple, Any
|
||||||
|
from collections.abc import Mapping
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
@ -129,6 +137,52 @@ def _normalize_rel_path(abs_path: str, vault_root: str) -> str:
|
||||||
rel = abs_path
|
rel = abs_path
|
||||||
return rel.replace("\\", "/").lstrip("/")
|
return rel.replace("\\", "/").lstrip("/")
|
||||||
|
|
||||||
|
def _coerce_to_dict(obj: Any) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Versucht, verschiedenartige Rückgaben (Mapping, Tuple, Pydantic, Objekt) in ein dict zu konvertieren.
|
||||||
|
- dict → dict
|
||||||
|
- Mapping → dict(obj)
|
||||||
|
- (dict, ...) oder [dict, ...] → erster dict-ähnlicher Eintrag
|
||||||
|
- Pydantic v2: .model_dump()
|
||||||
|
- Pydantic v1: .dict()
|
||||||
|
- Objekt mit __dict__ → dict(__dict__)
|
||||||
|
- sonst: None
|
||||||
|
"""
|
||||||
|
if obj is None:
|
||||||
|
return None
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return obj
|
||||||
|
if isinstance(obj, Mapping):
|
||||||
|
try:
|
||||||
|
return dict(obj)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if isinstance(obj, (list, tuple)):
|
||||||
|
for it in obj:
|
||||||
|
d = _coerce_to_dict(it)
|
||||||
|
if isinstance(d, dict):
|
||||||
|
return d
|
||||||
|
return None
|
||||||
|
# Pydantic v2
|
||||||
|
md = getattr(obj, "model_dump", None)
|
||||||
|
if callable(md):
|
||||||
|
try:
|
||||||
|
return md()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Pydantic v1
|
||||||
|
dd = getattr(obj, "dict", None)
|
||||||
|
if callable(dd):
|
||||||
|
try:
|
||||||
|
return dd()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# generischer Fallback
|
||||||
|
dct = getattr(obj, "__dict__", None)
|
||||||
|
if isinstance(dct, dict):
|
||||||
|
return dict(dct)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------
|
# ---------------------------------------------------------------------
|
||||||
# Main
|
# Main
|
||||||
|
|
@ -203,9 +257,9 @@ def main() -> None:
|
||||||
|
|
||||||
processed += 1
|
processed += 1
|
||||||
|
|
||||||
# -------------- Note-Payload (defensiv) --------------
|
# -------------- Note-Payload (defensiv + Koerzierung) --------------
|
||||||
try:
|
try:
|
||||||
note_pl = make_note_payload(
|
note_pl_raw = make_note_payload(
|
||||||
parsed,
|
parsed,
|
||||||
vault_root=root,
|
vault_root=root,
|
||||||
hash_mode=args.hash_mode,
|
hash_mode=args.hash_mode,
|
||||||
|
|
@ -217,8 +271,18 @@ def main() -> None:
|
||||||
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": f"make_note_payload failed: {e}"}))
|
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": f"make_note_payload failed: {e}"}))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
note_pl = _coerce_to_dict(note_pl_raw)
|
||||||
if not isinstance(note_pl, dict):
|
if not isinstance(note_pl, dict):
|
||||||
print(json.dumps({"path": path, "note_id": fm.get("id"), "error": "make_note_payload returned non-dict"}))
|
preview = repr(note_pl_raw)
|
||||||
|
if len(preview) > 240:
|
||||||
|
preview = preview[:240] + "…"
|
||||||
|
print(json.dumps({
|
||||||
|
"path": path,
|
||||||
|
"note_id": fm.get("id"),
|
||||||
|
"error": "make_note_payload returned non-dict",
|
||||||
|
"returned_type": type(note_pl_raw).__name__,
|
||||||
|
"preview": preview
|
||||||
|
}))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# fulltext sicherstellen + Pfad normalisieren
|
# fulltext sicherstellen + Pfad normalisieren
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user