mindnet/scripts/import_markdown.py
Lars 6945ef8ddc
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
scripts/import_markdown.py aktualisiert
2025-09-30 15:39:55 +02:00

70 lines
2.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/inspect_one_note.py
Diagnose einer einzelnen Markdown-Datei (kein Upsert).
Nutzung:
python3 tests/inspect_one_note.py --path ./test_vault/30_projects/project-demo.md
"""
from __future__ import annotations
import os, sys, json, argparse
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
from app.core.chunker import assemble_chunks
from app.core.chunk_payload import make_chunk_payloads
try:
from app.core.derive_edges import build_edges_for_note
except Exception:
from app.core.edges import build_edges_for_note # type: ignore
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--path", required=True)
args = ap.parse_args()
p = os.path.abspath(args.path)
try:
parsed = read_markdown(p)
if parsed is None:
print(json.dumps({"path": p, "error": "read_markdown returned None"}))
return
except Exception as e:
print(json.dumps({"path": p, "error": f"read_markdown failed: {type(e).__name__}: {e}"}))
return
try:
fm = normalize_frontmatter(parsed.frontmatter)
validate_required_frontmatter(fm)
except Exception as e:
print(json.dumps({"path": p, "error": f"frontmatter invalid: {type(e).__name__}: {e}"}))
return
body = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body, fm.get("type", "concept"))
chunk_pls = make_chunk_payloads(fm, os.path.relpath(p, os.path.join(p, "..", "..")), chunks, note_text=body)
edge_error = None
edges_count = 0
try:
edges = build_edges_for_note(fm["id"], chunk_pls, include_note_scope_refs=False)
edges_count = len(edges)
except Exception as e:
edge_error = f"{type(e).__name__}: {e}"
print(json.dumps({
"path": p,
"id": fm.get("id"),
"title": fm.get("title"),
"body_len": len(body),
"chunks": len(chunk_pls),
"edges": edges_count,
"edge_error": edge_error
}, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()