From b4287cbfda7151032d30bd2138459f41d1aabd6d Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 11 Nov 2025 17:25:54 +0100
Subject: [PATCH] Dateien nach "tests" hochladen

---
 tests/show_edges_for_note.py |  71 +++++++++++++++++++
 tests/test_edges_smoke.py    | 129 +++++++++++++++++++++++++++++++++++
 2 files changed, 200 insertions(+)
 create mode 100644 tests/show_edges_for_note.py
 create mode 100644 tests/test_edges_smoke.py

diff --git a/tests/show_edges_for_note.py b/tests/show_edges_for_note.py
new file mode 100644
index 0000000..908c37e
--- /dev/null
+++ b/tests/show_edges_for_note.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+scripts/show_edges_for_note.py
+
+Zeigt Kanten einer Note (gefiltert nach kind/scope) in einer kompakten Form.
+Aufrufbeispiele:
+    python3 -m scripts.show_edges_for_note --note-id 20251110-ollama-llm-9f0a12 --kinds references,next,prev --limit 10
+    python3 -m scripts.show_edges_for_note --title "Qdrant Vektordatenbank" --scope note
+"""
+
+from __future__ import annotations
+import argparse, json, os, sys
+from typing import Dict, Any, List, Tuple
+from qdrant_client.http import models as rest
+
+from app.core.qdrant import QdrantConfig, get_client
+
+def collections(prefix: str) -> Tuple[str, str, str]:
+    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
+
+def find_note_by_title(client, prefix: str, title: str) -> str | None:
+    notes_col, _, _ = collections(prefix)
+    f = rest.Filter(must=[rest.FieldCondition(key="title", match=rest.MatchText(text=title))])
+    pts, _ = client.scroll(collection_name=notes_col, scroll_filter=f, with_payload=True, with_vectors=False, limit=1)
+    if not pts:
+        return None
+    return pts[0].payload.get("note_id")
+
+def fetch_edges_for_note(client, prefix: str, note_id: str, kinds: List[str] | None, scope: str | None, limit: int) -> List[Dict[str, Any]]:
+    _, _, edges_col = collections(prefix)
+    must = [rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))]
+    if scope:
+        must.append(rest.FieldCondition(key="scope", match=rest.MatchValue(value=scope)))
+    if kinds:
+        must.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds)))
+    f = rest.Filter(must=must)
+    pts, _ = client.scroll(collection_name=edges_col, scroll_filter=f, with_payload=True, with_vectors=False, limit=limit)
+    return [p.payload for p in pts]
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--note-id")
+    ap.add_argument("--title")
+    ap.add_argument("--kinds", help="CSV: references,next,prev,belongs_to,backlink")
+    ap.add_argument("--scope", choices=["note","chunk"])
+    ap.add_argument("--limit", type=int, default=25)
+    args = ap.parse_args()
+
+    cfg = QdrantConfig.from_env()
+    client = get_client(cfg)
+
+    nid = args.note_id
+    if not nid and args.title:
+        nid = find_note_by_title(client, cfg.prefix, args.title)
+        if not nid:
+            print(json.dumps({"error": f"note with title '{args.title}' not found"}))
+            sys.exit(2)
+    if not nid:
+        print(json.dumps({"error": "please provide --note-id or --title"}))
+        sys.exit(2)
+
+    kinds = None
+    if args.kinds:
+        kinds = [s.strip() for s in args.kinds.split(",") if s.strip()]
+
+    edges = fetch_edges_for_note(client, cfg.prefix, nid, kinds, args.scope, args.limit)
+    print(json.dumps({"note_id": nid, "count": len(edges), "edges": edges}, ensure_ascii=False, indent=2))
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_edges_smoke.py b/tests/test_edges_smoke.py
new file mode 100644
index 0000000..0301e88
--- /dev/null
+++ b/tests/test_edges_smoke.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+scripts/test_edges_smoke.py
+
+Integritäts-Check für mindnet-Edges in Qdrant.
+Prüft pro Note:
+- Chunk-Anzahl (mindnet_chunks) = belongs_to-Kanten
+- next/prev-Kanten: jeweils (#Chunks - 1)
+- Dedupe: kein Duplikat (key=(kind,source_id,target_id,scope))
+- references (chunk-scope): vorhanden, wenn Wikilinks erwartet werden (nur Zählreport)
+- optional note-scope references/backlink: vorhanden, wenn --note-scope-refs genutzt wurde
+
+Ausgabe: JSON pro Note + Gesamtsummary.
+"""
+
+from __future__ import annotations
+import json, os, sys
+from typing import Dict, Any, List, Tuple, Set
+from qdrant_client.http import models as rest
+
+from app.core.qdrant import QdrantConfig, get_client
+
+def collections(prefix: str) -> Tuple[str, str, str]:
+    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
+
+def scroll_ids(client, collection: str, filt: rest.Filter | None = None, payload=False, limit=256):
+    next_page = None
+    while True:
+        pts, next_page = client.scroll(
+            collection_name=collection,
+            scroll_filter=filt,
+            with_payload=payload,
+            with_vectors=False,
+            limit=limit,
+            offset=next_page,
+        )
+        if not pts:
+            break
+        for p in pts:
+            yield p
+
+def list_notes(client, prefix: str) -> List[Dict[str, Any]]:
+    notes_col, _, _ = collections(prefix)
+    out = []
+    for p in scroll_ids(client, notes_col, None, payload=True):
+        pl = p.payload or {}
+        nid = pl.get("note_id") or pl.get("id")
+        if nid:
+            out.append({
+                "note_id": nid,
+                "title": pl.get("title"),
+                "type": pl.get("type"),
+            })
+    return out
+
+def count_chunks_for_note(client, prefix: str, note_id: str) -> int:
+    _, chunks_col, _ = collections(prefix)
+    filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
+    return sum(1 for _ in scroll_ids(client, chunks_col, filt, payload=False))
+
+def fetch_edges_for_note(client, prefix: str, note_id: str) -> List[Dict[str, Any]]:
+    _, _, edges_col = collections(prefix)
+    filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
+    return [p.payload or {} for p in scroll_ids(client, edges_col, filt, payload=True)]
+
+def main():
+    cfg = QdrantConfig.from_env()
+    client = get_client(cfg)
+    notes = list_notes(client, cfg.prefix)
+
+    report = []
+    total = {"notes": 0, "chunks": 0, "belongs_to": 0, "next": 0, "prev": 0, "refs_chunk": 0, "refs_note": 0, "backlink": 0, "dup_edges": 0}
+    for n in notes:
+        nid = n["note_id"]
+        total["notes"] += 1
+        chunk_count = count_chunks_for_note(client, cfg.prefix, nid)
+        total["chunks"] += chunk_count
+
+        edges = fetch_edges_for_note(client, cfg.prefix, nid)
+        by_kind = {}
+        keys: Set[tuple] = set()
+        dup_count = 0
+        for e in edges:
+            k = e.get("kind")
+            by_kind[k] = by_kind.get(k, 0) + 1
+            t = (e.get("kind"), e.get("source_id"), e.get("target_id"), e.get("scope"))
+            if t in keys:
+                dup_count += 1
+            else:
+                keys.add(t)
+
+        bt = by_kind.get("belongs_to", 0)
+        nx = by_kind.get("next", 0)
+        pv = by_kind.get("prev", 0)
+        rc = by_kind.get("references", 0) if any(e.get("scope") == "chunk" and e.get("kind") == "references" for e in edges) else 0
+        rn = sum(1 for e in edges if e.get("scope") == "note" and e.get("kind") == "references")
+        bl = by_kind.get("backlink", 0)
+
+        total["belongs_to"] += bt
+        total["next"] += nx
+        total["prev"] += pv
+        total["refs_chunk"] += rc
+        total["refs_note"] += rn
+        total["backlink"] += bl
+        total["dup_edges"] += dup_count
+
+        ok_bt = (bt == chunk_count)
+        ok_seq = (nx == max(chunk_count - 1, 0) and pv == max(chunk_count - 1, 0))
+        ok_dup = (dup_count == 0)
+
+        report.append({
+            "note_id": nid,
+            "title": n.get("title"),
+            "type": n.get("type"),
+            "chunks": chunk_count,
+            "edges_by_kind": by_kind,
+            "checks": {
+                "belongs_to_equals_chunks": ok_bt,
+                "next_prev_match": ok_seq,
+                "no_duplicate_edges": ok_dup,
+            }
+        })
+
+    out = {"prefix": cfg.prefix, "summary": total, "notes": report}
+    print(json.dumps(out, ensure_ascii=False, indent=2))
+
+if __name__ == "__main__":
+    main()