#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ tests/test_edges_all.py Ein knapper Integrationscheck: - Es existieren Notes/Chunks/Edges - Inline-Edges (rule_id startswith "inline:") werden erkannt - Callout-Edges (rule_id == "callout:edge") werden erkannt - Defaults (rule_id startswith "edge_defaults:") werden erkannt - Strukturkanten stimmen (belongs_to == chunks; next == prev == chunks-1) """ from __future__ import annotations import json from collections import Counter, defaultdict from typing import Dict, Any, List, Tuple from app.core.database.qdrant import QdrantConfig, get_client def _scroll_all(client, collection: str): pts_all = [] offset = None while True: pts, offset = client.scroll( collection_name=collection, with_payload=True, with_vectors=False, limit=2048, offset=offset, ) pts_all.extend(pts or []) if offset is None: break return pts_all def _rule_group(rule_id: str) -> str: if not rule_id: return "unknown" if rule_id == "callout:edge": return "callout" if rule_id.startswith("inline:"): # <—— wichtig für inline:rel return "inline" if rule_id.startswith("edge_defaults:"): return "defaults" if rule_id.startswith("explicit:"): return "explicit" if rule_id in ("structure:belongs_to", "structure:order"): return "structure" return "other" def main() -> None: cfg = QdrantConfig.from_env() client = get_client(cfg) col_notes = f"{cfg.prefix}_notes" col_chunks = f"{cfg.prefix}_chunks" col_edges = f"{cfg.prefix}_edges" notes_n = client.count(collection_name=col_notes, exact=True).count chunks_pts = _scroll_all(client, col_chunks) edges_pts = _scroll_all(client, col_edges) ok = True # Basisbedingungen if notes_n == 0 or len(chunks_pts) == 0 or len(edges_pts) == 0: ok = False # Gruppen zählen g = Counter(_rule_group((p.payload or {}).get("rule_id", "")) for p in edges_pts) structure = g.get("structure", 0) explicit = g.get("explicit", 0) inline = g.get("inline", 0) callout = g.get("callout", 0) defaults = g.get("defaults", 0) if structure == 0: ok = False # mindestens eine der expliziten Varianten vorhanden if (explicit + inline + callout) == 0: ok = False # defaults dürfen 0 sein, wenn types.yaml keine edge_defaults liefert – daher nur Info # per-note checks chunks_by_note = Counter([p.payload.get("note_id") for p in chunks_pts if p.payload]) belongs = Counter( (p.payload or {}).get("note_id") for p in edges_pts if (p.payload or {}).get("kind") == "belongs_to" ) nxt = Counter( (p.payload or {}).get("note_id") for p in edges_pts if (p.payload or {}).get("kind") == "next" ) prv = Counter( (p.payload or {}).get("note_id") for p in edges_pts if (p.payload or {}).get("kind") == "prev" ) for n_id, c in chunks_by_note.items(): if belongs.get(n_id, 0) != c: ok = False if (nxt.get(n_id, 0) != max(c - 1, 0)) or (prv.get(n_id, 0) != max(c - 1, 0)): ok = False print(json.dumps({"ok": ok, "notes_checked": len(chunks_by_note)}, ensure_ascii=False)) if __name__ == "__main__": main()