mindnet/tests/test_edges_smoke.py
Lars e93bab6ea7
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
Fassadenauflösung unter app/core
2025-12-28 11:04:40 +01:00

134 lines
3.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import json
import os
from collections import Counter, defaultdict
from qdrant_client.http import models as rest
from app.core.database.qdrant import QdrantConfig, get_client
def _rel(pl: dict) -> str:
return pl.get("relation") or pl.get("kind") or "edge"
def _scroll(client, col):
pts = []
next_page = None
while True:
res, next_page = client.scroll(
collection_name=col,
with_payload=True,
with_vectors=False,
limit=1024,
offset=next_page,
)
pts.extend(res)
if next_page is None:
break
return pts
def main():
cfg = QdrantConfig.from_env()
client = get_client(cfg)
prefix = os.environ.get("COLLECTION_PREFIX", cfg.prefix)
cols = {
"notes": f"{prefix}_notes",
"chunks": f"{prefix}_chunks",
"edges": f"{prefix}_edges",
}
# Index: notes -> title/type
notes_meta = {}
for p in _scroll(client, cols["notes"]):
pl = p.payload or {}
nid = pl.get("note_id")
if nid:
notes_meta[nid] = {
"title": pl.get("title", ""),
"type": pl.get("type", ""),
}
# chunks je note
chunks_by_note = defaultdict(int)
for p in _scroll(client, cols["chunks"]):
pl = p.payload or {}
nid = pl.get("note_id")
if nid:
chunks_by_note[nid] += 1
# edges je note
edges_by_note = defaultdict(list)
edges_all = _scroll(client, cols["edges"])
for p in edges_all:
pl = p.payload or {}
nid = pl.get("note_id")
if nid:
edges_by_note[nid].append(pl)
# pro note ausgeben
summary_edges = Counter()
total_chunks = 0
for nid in sorted(notes_meta.keys()):
meta = notes_meta[nid]
chunks = chunks_by_note.get(nid, 0)
total_chunks += chunks
kinds = Counter(_rel(pl) for pl in edges_by_note[nid])
summary_edges.update(kinds)
row = {
"note_id": nid,
"title": meta["title"],
"type": meta["type"],
"chunks": chunks,
"edges_by_kind": dict(kinds),
"checks": {
"belongs_to_equals_chunks": (kinds.get("belongs_to", 0) == chunks),
"next_prev_match": (kinds.get("next", 0) == kinds.get("prev", 0) == max(0, chunks - 1)),
"no_duplicate_edges": _no_dupes(edges_by_note[nid]),
},
}
print(json.dumps(row, ensure_ascii=False))
# Gesamtsummary
total_notes = len(notes_meta)
out = {
"prefix": prefix,
"summary": {
"notes": total_notes,
"chunks": total_chunks,
"belongs_to": summary_edges.get("belongs_to", 0),
"next": summary_edges.get("next", 0),
"prev": summary_edges.get("prev", 0),
"refs_chunk": summary_edges.get("references", 0),
"refs_note": summary_edges.get("references_note", 0),
"backlink": summary_edges.get("backlink", 0),
"dup_edges": 0, # per-Note geprüft
},
}
print(json.dumps(out, ensure_ascii=False))
def _no_dupes(pls):
seen = set()
for pl in pls:
key = (
str(pl.get("source_id") or ""),
str(pl.get("target_id") or ""),
str(pl.get("relation") or pl.get("kind") or ""),
str(pl.get("rule_id") or ""),
)
if key in seen:
return False
seen.add(key)
return True
if __name__ == "__main__":
main()