mindnet/tests/test_edges_all.py
Lars e93bab6ea7
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
Fassadenauflösung unter app/core
2025-12-28 11:04:40 +01:00

117 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_edges_all.py
Ein knapper Integrationscheck:
- Es existieren Notes/Chunks/Edges
- Inline-Edges (rule_id startswith "inline:") werden erkannt
- Callout-Edges (rule_id == "callout:edge") werden erkannt
- Defaults (rule_id startswith "edge_defaults:") werden erkannt
- Strukturkanten stimmen (belongs_to == chunks; next == prev == chunks-1)
"""
from __future__ import annotations
import json
from collections import Counter, defaultdict
from typing import Dict, Any, List, Tuple
from app.core.database.qdrant import QdrantConfig, get_client
def _scroll_all(client, collection: str):
pts_all = []
offset = None
while True:
pts, offset = client.scroll(
collection_name=collection,
with_payload=True,
with_vectors=False,
limit=2048,
offset=offset,
)
pts_all.extend(pts or [])
if offset is None:
break
return pts_all
def _rule_group(rule_id: str) -> str:
if not rule_id:
return "unknown"
if rule_id == "callout:edge":
return "callout"
if rule_id.startswith("inline:"): # <—— wichtig für inline:rel
return "inline"
if rule_id.startswith("edge_defaults:"):
return "defaults"
if rule_id.startswith("explicit:"):
return "explicit"
if rule_id in ("structure:belongs_to", "structure:order"):
return "structure"
return "other"
def main() -> None:
cfg = QdrantConfig.from_env()
client = get_client(cfg)
col_notes = f"{cfg.prefix}_notes"
col_chunks = f"{cfg.prefix}_chunks"
col_edges = f"{cfg.prefix}_edges"
notes_n = client.count(collection_name=col_notes, exact=True).count
chunks_pts = _scroll_all(client, col_chunks)
edges_pts = _scroll_all(client, col_edges)
ok = True
# Basisbedingungen
if notes_n == 0 or len(chunks_pts) == 0 or len(edges_pts) == 0:
ok = False
# Gruppen zählen
g = Counter(_rule_group((p.payload or {}).get("rule_id", "")) for p in edges_pts)
structure = g.get("structure", 0)
explicit = g.get("explicit", 0)
inline = g.get("inline", 0)
callout = g.get("callout", 0)
defaults = g.get("defaults", 0)
if structure == 0:
ok = False
# mindestens eine der expliziten Varianten vorhanden
if (explicit + inline + callout) == 0:
ok = False
# defaults dürfen 0 sein, wenn types.yaml keine edge_defaults liefert daher nur Info
# per-note checks
chunks_by_note = Counter([p.payload.get("note_id") for p in chunks_pts if p.payload])
belongs = Counter(
(p.payload or {}).get("note_id")
for p in edges_pts
if (p.payload or {}).get("kind") == "belongs_to"
)
nxt = Counter(
(p.payload or {}).get("note_id")
for p in edges_pts
if (p.payload or {}).get("kind") == "next"
)
prv = Counter(
(p.payload or {}).get("note_id")
for p in edges_pts
if (p.payload or {}).get("kind") == "prev"
)
for n_id, c in chunks_by_note.items():
if belongs.get(n_id, 0) != c:
ok = False
if (nxt.get(n_id, 0) != max(c - 1, 0)) or (prv.get(n_id, 0) != max(c - 1, 0)):
ok = False
print(json.dumps({"ok": ok, "notes_checked": len(chunks_by_note)}, ensure_ascii=False))
if __name__ == "__main__":
main()