mindnet/tests/test_edges_smoke.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations

import json
import os
from collections import Counter, defaultdict

from qdrant_client.http import models as rest
from app.core.database.qdrant import QdrantConfig, get_client


def _rel(pl: dict) -> str:
    return pl.get("relation") or pl.get("kind") or "edge"


def _scroll(client, col):
    pts = []
    next_page = None
    while True:
        res, next_page = client.scroll(
            collection_name=col,
            with_payload=True,
            with_vectors=False,
            limit=1024,
            offset=next_page,
        )
        pts.extend(res)
        if next_page is None:
            break
    return pts


def main():
    cfg = QdrantConfig.from_env()
    client = get_client(cfg)
    prefix = os.environ.get("COLLECTION_PREFIX", cfg.prefix)

    cols = {
        "notes": f"{prefix}_notes",
        "chunks": f"{prefix}_chunks",
        "edges": f"{prefix}_edges",
    }

    # Index: notes -> title/type
    notes_meta = {}
    for p in _scroll(client, cols["notes"]):
        pl = p.payload or {}
        nid = pl.get("note_id")
        if nid:
            notes_meta[nid] = {
                "title": pl.get("title", ""),
                "type": pl.get("type", ""),
            }

    # chunks je note
    chunks_by_note = defaultdict(int)
    for p in _scroll(client, cols["chunks"]):
        pl = p.payload or {}
        nid = pl.get("note_id")
        if nid:
            chunks_by_note[nid] += 1

    # edges je note
    edges_by_note = defaultdict(list)
    edges_all = _scroll(client, cols["edges"])
    for p in edges_all:
        pl = p.payload or {}
        nid = pl.get("note_id")
        if nid:
            edges_by_note[nid].append(pl)

    # pro note ausgeben
    summary_edges = Counter()
    total_chunks = 0
    for nid in sorted(notes_meta.keys()):
        meta = notes_meta[nid]
        chunks = chunks_by_note.get(nid, 0)
        total_chunks += chunks

        kinds = Counter(_rel(pl) for pl in edges_by_note[nid])
        summary_edges.update(kinds)

        row = {
            "note_id": nid,
            "title": meta["title"],
            "type": meta["type"],
            "chunks": chunks,
            "edges_by_kind": dict(kinds),
            "checks": {
                "belongs_to_equals_chunks": (kinds.get("belongs_to", 0) == chunks),
                "next_prev_match": (kinds.get("next", 0) == kinds.get("prev", 0) == max(0, chunks - 1)),
                "no_duplicate_edges": _no_dupes(edges_by_note[nid]),
            },
        }
        print(json.dumps(row, ensure_ascii=False))

    # Gesamtsummary
    total_notes = len(notes_meta)
    out = {
        "prefix": prefix,
        "summary": {
            "notes": total_notes,
            "chunks": total_chunks,
            "belongs_to": summary_edges.get("belongs_to", 0),
            "next": summary_edges.get("next", 0),
            "prev": summary_edges.get("prev", 0),
            "refs_chunk": summary_edges.get("references", 0),
            "refs_note": summary_edges.get("references_note", 0),
            "backlink": summary_edges.get("backlink", 0),
            "dup_edges": 0,  # per-Note geprüft
        },
    }
    print(json.dumps(out, ensure_ascii=False))


def _no_dupes(pls):
    seen = set()
    for pl in pls:
        key = (
            str(pl.get("source_id") or ""),
            str(pl.get("target_id") or ""),
            str(pl.get("relation") or pl.get("kind") or ""),
            str(pl.get("rule_id") or ""),
        )
        if key in seen:
            return False
        seen.add(key)
    return True


if __name__ == "__main__":
    main()