From b4287cbfda7151032d30bd2138459f41d1aabd6d Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 11 Nov 2025 17:25:54 +0100 Subject: [PATCH] Dateien nach "tests" hochladen --- tests/show_edges_for_note.py | 71 +++++++++++++++++++ tests/test_edges_smoke.py | 129 +++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 tests/show_edges_for_note.py create mode 100644 tests/test_edges_smoke.py diff --git a/tests/show_edges_for_note.py b/tests/show_edges_for_note.py new file mode 100644 index 0000000..908c37e --- /dev/null +++ b/tests/show_edges_for_note.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +scripts/show_edges_for_note.py + +Zeigt Kanten einer Note (gefiltert nach kind/scope) in einer kompakten Form. +Aufrufbeispiele: + python3 -m scripts.show_edges_for_note --note-id 20251110-ollama-llm-9f0a12 --kinds references,next,prev --limit 10 + python3 -m scripts.show_edges_for_note --title "Qdrant Vektordatenbank" --scope note +""" + +from __future__ import annotations +import argparse, json, os, sys +from typing import Dict, Any, List, Tuple +from qdrant_client.http import models as rest + +from app.core.qdrant import QdrantConfig, get_client + +def collections(prefix: str) -> Tuple[str, str, str]: + return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" + +def find_note_by_title(client, prefix: str, title: str) -> str | None: + notes_col, _, _ = collections(prefix) + f = rest.Filter(must=[rest.FieldCondition(key="title", match=rest.MatchText(text=title))]) + pts, _ = client.scroll(collection_name=notes_col, scroll_filter=f, with_payload=True, with_vectors=False, limit=1) + if not pts: + return None + return pts[0].payload.get("note_id") + +def fetch_edges_for_note(client, prefix: str, note_id: str, kinds: List[str] | None, scope: str | None, limit: int) -> List[Dict[str, Any]]: + _, _, edges_col = collections(prefix) + must = [rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))] + if scope: + must.append(rest.FieldCondition(key="scope", match=rest.MatchValue(value=scope))) + if kinds: + must.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds))) + f = rest.Filter(must=must) + pts, _ = client.scroll(collection_name=edges_col, scroll_filter=f, with_payload=True, with_vectors=False, limit=limit) + return [p.payload for p in pts] + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--note-id") + ap.add_argument("--title") + ap.add_argument("--kinds", help="CSV: references,next,prev,belongs_to,backlink") + ap.add_argument("--scope", choices=["note","chunk"]) + ap.add_argument("--limit", type=int, default=25) + args = ap.parse_args() + + cfg = QdrantConfig.from_env() + client = get_client(cfg) + + nid = args.note_id + if not nid and args.title: + nid = find_note_by_title(client, cfg.prefix, args.title) + if not nid: + print(json.dumps({"error": f"note with title '{args.title}' not found"})) + sys.exit(2) + if not nid: + print(json.dumps({"error": "please provide --note-id or --title"})) + sys.exit(2) + + kinds = None + if args.kinds: + kinds = [s.strip() for s in args.kinds.split(",") if s.strip()] + + edges = fetch_edges_for_note(client, cfg.prefix, nid, kinds, args.scope, args.limit) + print(json.dumps({"note_id": nid, "count": len(edges), "edges": edges}, ensure_ascii=False, indent=2)) + +if __name__ == "__main__": + main() diff --git a/tests/test_edges_smoke.py b/tests/test_edges_smoke.py new file mode 100644 index 0000000..0301e88 --- /dev/null +++ b/tests/test_edges_smoke.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +scripts/test_edges_smoke.py + +Integritäts-Check für mindnet-Edges in Qdrant. +Prüft pro Note: +- Chunk-Anzahl (mindnet_chunks) = belongs_to-Kanten +- next/prev-Kanten: jeweils (#Chunks - 1) +- Dedupe: kein Duplikat (key=(kind,source_id,target_id,scope)) +- references (chunk-scope): vorhanden, wenn Wikilinks erwartet werden (nur Zählreport) +- optional note-scope references/backlink: vorhanden, wenn --note-scope-refs genutzt wurde + +Ausgabe: JSON pro Note + Gesamtsummary. +""" + +from __future__ import annotations +import json, os, sys +from typing import Dict, Any, List, Tuple, Set +from qdrant_client.http import models as rest + +from app.core.qdrant import QdrantConfig, get_client + +def collections(prefix: str) -> Tuple[str, str, str]: + return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" + +def scroll_ids(client, collection: str, filt: rest.Filter | None = None, payload=False, limit=256): + next_page = None + while True: + pts, next_page = client.scroll( + collection_name=collection, + scroll_filter=filt, + with_payload=payload, + with_vectors=False, + limit=limit, + offset=next_page, + ) + if not pts: + break + for p in pts: + yield p + +def list_notes(client, prefix: str) -> List[Dict[str, Any]]: + notes_col, _, _ = collections(prefix) + out = [] + for p in scroll_ids(client, notes_col, None, payload=True): + pl = p.payload or {} + nid = pl.get("note_id") or pl.get("id") + if nid: + out.append({ + "note_id": nid, + "title": pl.get("title"), + "type": pl.get("type"), + }) + return out + +def count_chunks_for_note(client, prefix: str, note_id: str) -> int: + _, chunks_col, _ = collections(prefix) + filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))]) + return sum(1 for _ in scroll_ids(client, chunks_col, filt, payload=False)) + +def fetch_edges_for_note(client, prefix: str, note_id: str) -> List[Dict[str, Any]]: + _, _, edges_col = collections(prefix) + filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))]) + return [p.payload or {} for p in scroll_ids(client, edges_col, filt, payload=True)] + +def main(): + cfg = QdrantConfig.from_env() + client = get_client(cfg) + notes = list_notes(client, cfg.prefix) + + report = [] + total = {"notes": 0, "chunks": 0, "belongs_to": 0, "next": 0, "prev": 0, "refs_chunk": 0, "refs_note": 0, "backlink": 0, "dup_edges": 0} + for n in notes: + nid = n["note_id"] + total["notes"] += 1 + chunk_count = count_chunks_for_note(client, cfg.prefix, nid) + total["chunks"] += chunk_count + + edges = fetch_edges_for_note(client, cfg.prefix, nid) + by_kind = {} + keys: Set[tuple] = set() + dup_count = 0 + for e in edges: + k = e.get("kind") + by_kind[k] = by_kind.get(k, 0) + 1 + t = (e.get("kind"), e.get("source_id"), e.get("target_id"), e.get("scope")) + if t in keys: + dup_count += 1 + else: + keys.add(t) + + bt = by_kind.get("belongs_to", 0) + nx = by_kind.get("next", 0) + pv = by_kind.get("prev", 0) + rc = by_kind.get("references", 0) if any(e.get("scope") == "chunk" and e.get("kind") == "references" for e in edges) else 0 + rn = sum(1 for e in edges if e.get("scope") == "note" and e.get("kind") == "references") + bl = by_kind.get("backlink", 0) + + total["belongs_to"] += bt + total["next"] += nx + total["prev"] += pv + total["refs_chunk"] += rc + total["refs_note"] += rn + total["backlink"] += bl + total["dup_edges"] += dup_count + + ok_bt = (bt == chunk_count) + ok_seq = (nx == max(chunk_count - 1, 0) and pv == max(chunk_count - 1, 0)) + ok_dup = (dup_count == 0) + + report.append({ + "note_id": nid, + "title": n.get("title"), + "type": n.get("type"), + "chunks": chunk_count, + "edges_by_kind": by_kind, + "checks": { + "belongs_to_equals_chunks": ok_bt, + "next_prev_match": ok_seq, + "no_duplicate_edges": ok_dup, + } + }) + + out = {"prefix": cfg.prefix, "summary": total, "notes": report} + print(json.dumps(out, ensure_ascii=False, indent=2)) + +if __name__ == "__main__": + main()