Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-11 16:46:48 +01:00
parent 4ea62e6886
commit 725271a7da

107
scripts/edges_dryrun.py Normal file
View File

@ -0,0 +1,107 @@
#!/usr/bin/env python3
# scripts/edges_dryrun.py
# -*- coding: utf-8 -*-
"""
Dry-Run: Erzeuge Edges aus einem Vault **ohne** Qdrant-Upsert.
- Liest Markdown mit YAML-Frontmatter
- Chunking: einfacher Absatz-Chunker (Index + text)
- Kanten: nutzt app.core.edges.build_edges_for_note()
- Ausgabe: JSON pro Note mit Edge-Counts und 3 Beispiel-Payloads
Aufruf:
python3 -m scripts.edges_dryrun --vault ./vault
Optional:
--include-note-scope-refs # auch Frontmatter-Links (links[].target_id) als Note-Scope-Referenzen
Voraussetzungen:
- app/core/parser.py (read_markdown, normalize_frontmatter)
- app/core/edges.py (dieses Modul forwardet zur v2-Implementierung)
"""
from __future__ import annotations
import argparse
import json
import os
import re
from pathlib import Path
from typing import Dict, List, Optional
from app.core.parser import read_markdown, normalize_frontmatter
from app.core.edges import build_edges_for_note
def _iter_markdown(vault: str):
for p in Path(vault).rglob("*.md"):
if p.name.startswith("."): # ignore hidden
continue
yield p
def _simple_chunker(body: str, note_id: str, note_type: str) -> List[Dict]:
# Absatzbasiert, minimal ausreichend für Edges (window/text, chunk_id, ord, note_id, type)
paras = [s.strip() for s in re.split(r"\n{2,}", body or "") if s.strip()]
chunks = []
for i, t in enumerate(paras):
chunks.append({
"chunk_id": f"{note_id}#c{i:04d}",
"ord": i,
"text": t,
"note_id": note_id,
"type": note_type,
})
return chunks
def _fm_note_refs(fm: Dict) -> List[str]:
out = []
links = fm.get("links") or []
if isinstance(links, list):
for e in links:
if isinstance(e, dict):
tid = e.get("target_id")
if isinstance(tid, str) and tid.strip():
out.append(tid.strip())
return out
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--vault", required=True)
ap.add_argument("--include-note-scope-refs", action="store_true")
args = ap.parse_args()
vault = args.vault
include_note_scope = args.include_note_scope_refs
report = []
for p in _iter_markdown(vault):
parsed = read_markdown(str(p))
if not parsed:
continue
fm = normalize_frontmatter(parsed.frontmatter or {})
note_id = fm.get("id") or p.stem
note_type = (fm.get("type") or "concept").lower()
chunks = _simple_chunker(parsed.body, note_id, note_type)
note_refs = _fm_note_refs(fm)
edges = build_edges_for_note(
note_id=note_id,
chunk_payloads=chunks,
note_level_refs=note_refs,
include_note_scope_refs=include_note_scope,
)
kinds = {}
for e in edges:
key = (e.get("relation") or e.get("kind") or "edge")
kinds[key] = kinds.get(key, 0) + 1
report.append({
"path": str(p),
"note_id": note_id,
"type": note_type,
"chunks": len(chunks),
"edges_total": len(edges),
"edges_by_kind": kinds,
"samples": edges[:3],
})
print(json.dumps(report, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()