mindnet/tests/test_edges_all.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_edges_all.py
Ein knapper Integrationscheck:
- Es existieren Notes/Chunks/Edges
- Inline-Edges (rule_id startswith "inline:") werden erkannt
- Callout-Edges (rule_id == "callout:edge") werden erkannt
- Defaults (rule_id startswith "edge_defaults:") werden erkannt
- Strukturkanten stimmen (belongs_to == chunks; next == prev == chunks-1)
"""

from __future__ import annotations

import json
from collections import Counter, defaultdict
from typing import Dict, Any, List, Tuple

from app.core.database.qdrant import QdrantConfig, get_client


def _scroll_all(client, collection: str):
    pts_all = []
    offset = None
    while True:
        pts, offset = client.scroll(
            collection_name=collection,
            with_payload=True,
            with_vectors=False,
            limit=2048,
            offset=offset,
        )
        pts_all.extend(pts or [])
        if offset is None:
            break
    return pts_all


def _rule_group(rule_id: str) -> str:
    if not rule_id:
        return "unknown"
    if rule_id == "callout:edge":
        return "callout"
    if rule_id.startswith("inline:"):      # <—— wichtig für inline:rel
        return "inline"
    if rule_id.startswith("edge_defaults:"):
        return "defaults"
    if rule_id.startswith("explicit:"):
        return "explicit"
    if rule_id in ("structure:belongs_to", "structure:order"):
        return "structure"
    return "other"


def main() -> None:
    cfg = QdrantConfig.from_env()
    client = get_client(cfg)

    col_notes = f"{cfg.prefix}_notes"
    col_chunks = f"{cfg.prefix}_chunks"
    col_edges = f"{cfg.prefix}_edges"

    notes_n = client.count(collection_name=col_notes, exact=True).count
    chunks_pts = _scroll_all(client, col_chunks)
    edges_pts = _scroll_all(client, col_edges)

    ok = True

    # Basisbedingungen
    if notes_n == 0 or len(chunks_pts) == 0 or len(edges_pts) == 0:
        ok = False

    # Gruppen zählen
    g = Counter(_rule_group((p.payload or {}).get("rule_id", "")) for p in edges_pts)
    structure = g.get("structure", 0)
    explicit = g.get("explicit", 0)
    inline = g.get("inline", 0)
    callout = g.get("callout", 0)
    defaults = g.get("defaults", 0)

    if structure == 0:
        ok = False
    # mindestens eine der expliziten Varianten vorhanden
    if (explicit + inline + callout) == 0:
        ok = False
    # defaults dürfen 0 sein, wenn types.yaml keine edge_defaults liefert – daher nur Info

    # per-note checks
    chunks_by_note = Counter([p.payload.get("note_id") for p in chunks_pts if p.payload])
    belongs = Counter(
        (p.payload or {}).get("note_id")
        for p in edges_pts
        if (p.payload or {}).get("kind") == "belongs_to"
    )
    nxt = Counter(
        (p.payload or {}).get("note_id")
        for p in edges_pts
        if (p.payload or {}).get("kind") == "next"
    )
    prv = Counter(
        (p.payload or {}).get("note_id")
        for p in edges_pts
        if (p.payload or {}).get("kind") == "prev"
    )

    for n_id, c in chunks_by_note.items():
        if belongs.get(n_id, 0) != c:
            ok = False
        if (nxt.get(n_id, 0) != max(c - 1, 0)) or (prv.get(n_id, 0) != max(c - 1, 0)):
            ok = False

    print(json.dumps({"ok": ok, "notes_checked": len(chunks_by_note)}, ensure_ascii=False))


if __name__ == "__main__":
    main()