#!/usr/bin/env python3
# scripts/edges_dryrun.py
# -*- coding: utf-8 -*-
"""
Dry-Run: Erzeuge Edges aus einem Vault **ohne** Qdrant-Upsert.
- Liest Markdown mit YAML-Frontmatter
- Chunking: einfacher Absatz-Chunker (Index + text)
- Kanten: nutzt app.core.edges.build_edges_for_note()
- Ausgabe: JSON pro Note mit Edge-Counts und 3 Beispiel-Payloads

Aufruf:
    python3 -m scripts.edges_dryrun --vault ./vault

Optional:
    --include-note-scope-refs   # auch Frontmatter-Links (links[].target_id) als Note-Scope-Referenzen

Voraussetzungen:
    - app/core/parser.py (read_markdown, normalize_frontmatter)
    - app/core/edges.py   (dieses Modul forwardet zur v2-Implementierung)
"""
from __future__ import annotations

import argparse
import json
import os
import re
from pathlib import Path
from typing import Dict, List, Optional

from app.core.parser import read_markdown, normalize_frontmatter
from app.core.edges import build_edges_for_note

def _iter_markdown(vault: str):
    for p in Path(vault).rglob("*.md"):
        if p.name.startswith("."):  # ignore hidden
            continue
        yield p

def _simple_chunker(body: str, note_id: str, note_type: str) -> List[Dict]:
    # Absatzbasiert, minimal ausreichend für Edges (window/text, chunk_id, ord, note_id, type)
    paras = [s.strip() for s in re.split(r"\n{2,}", body or "") if s.strip()]
    chunks = []
    for i, t in enumerate(paras):
        chunks.append({
            "chunk_id": f"{note_id}#c{i:04d}",
            "ord": i,
            "text": t,
            "note_id": note_id,
            "type": note_type,
        })
    return chunks

def _fm_note_refs(fm: Dict) -> List[str]:
    out = []
    links = fm.get("links") or []
    if isinstance(links, list):
        for e in links:
            if isinstance(e, dict):
                tid = e.get("target_id")
                if isinstance(tid, str) and tid.strip():
                    out.append(tid.strip())
    return out

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--vault", required=True)
    ap.add_argument("--include-note-scope-refs", action="store_true")
    args = ap.parse_args()

    vault = args.vault
    include_note_scope = args.include_note_scope_refs

    report = []
    for p in _iter_markdown(vault):
        parsed = read_markdown(str(p))
        if not parsed:
            continue
        fm = normalize_frontmatter(parsed.frontmatter or {})
        note_id = fm.get("id") or p.stem
        note_type = (fm.get("type") or "concept").lower()
        chunks = _simple_chunker(parsed.body, note_id, note_type)
        note_refs = _fm_note_refs(fm)

        edges = build_edges_for_note(
            note_id=note_id,
            chunk_payloads=chunks,
            note_level_refs=note_refs,
            include_note_scope_refs=include_note_scope,
        )
        kinds = {}
        for e in edges:
            key = (e.get("relation") or e.get("kind") or "edge")
            kinds[key] = kinds.get(key, 0) + 1
        report.append({
            "path": str(p),
            "note_id": note_id,
            "type": note_type,
            "chunks": len(chunks),
            "edges_total": len(edges),
            "edges_by_kind": kinds,
            "samples": edges[:3],
        })

    print(json.dumps(report, ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()