Dateien nach "tests" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
26724a1db1
commit
b4287cbfda
71
tests/show_edges_for_note.py
Normal file
71
tests/show_edges_for_note.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
scripts/show_edges_for_note.py
|
||||||
|
|
||||||
|
Zeigt Kanten einer Note (gefiltert nach kind/scope) in einer kompakten Form.
|
||||||
|
Aufrufbeispiele:
|
||||||
|
python3 -m scripts.show_edges_for_note --note-id 20251110-ollama-llm-9f0a12 --kinds references,next,prev --limit 10
|
||||||
|
python3 -m scripts.show_edges_for_note --title "Qdrant Vektordatenbank" --scope note
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import argparse, json, os, sys
|
||||||
|
from typing import Dict, Any, List, Tuple
|
||||||
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
|
from app.core.qdrant import QdrantConfig, get_client
|
||||||
|
|
||||||
|
def collections(prefix: str) -> Tuple[str, str, str]:
|
||||||
|
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
||||||
|
|
||||||
|
def find_note_by_title(client, prefix: str, title: str) -> str | None:
|
||||||
|
notes_col, _, _ = collections(prefix)
|
||||||
|
f = rest.Filter(must=[rest.FieldCondition(key="title", match=rest.MatchText(text=title))])
|
||||||
|
pts, _ = client.scroll(collection_name=notes_col, scroll_filter=f, with_payload=True, with_vectors=False, limit=1)
|
||||||
|
if not pts:
|
||||||
|
return None
|
||||||
|
return pts[0].payload.get("note_id")
|
||||||
|
|
||||||
|
def fetch_edges_for_note(client, prefix: str, note_id: str, kinds: List[str] | None, scope: str | None, limit: int) -> List[Dict[str, Any]]:
|
||||||
|
_, _, edges_col = collections(prefix)
|
||||||
|
must = [rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))]
|
||||||
|
if scope:
|
||||||
|
must.append(rest.FieldCondition(key="scope", match=rest.MatchValue(value=scope)))
|
||||||
|
if kinds:
|
||||||
|
must.append(rest.FieldCondition(key="kind", match=rest.MatchAny(any=kinds)))
|
||||||
|
f = rest.Filter(must=must)
|
||||||
|
pts, _ = client.scroll(collection_name=edges_col, scroll_filter=f, with_payload=True, with_vectors=False, limit=limit)
|
||||||
|
return [p.payload for p in pts]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--note-id")
|
||||||
|
ap.add_argument("--title")
|
||||||
|
ap.add_argument("--kinds", help="CSV: references,next,prev,belongs_to,backlink")
|
||||||
|
ap.add_argument("--scope", choices=["note","chunk"])
|
||||||
|
ap.add_argument("--limit", type=int, default=25)
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
cfg = QdrantConfig.from_env()
|
||||||
|
client = get_client(cfg)
|
||||||
|
|
||||||
|
nid = args.note_id
|
||||||
|
if not nid and args.title:
|
||||||
|
nid = find_note_by_title(client, cfg.prefix, args.title)
|
||||||
|
if not nid:
|
||||||
|
print(json.dumps({"error": f"note with title '{args.title}' not found"}))
|
||||||
|
sys.exit(2)
|
||||||
|
if not nid:
|
||||||
|
print(json.dumps({"error": "please provide --note-id or --title"}))
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
kinds = None
|
||||||
|
if args.kinds:
|
||||||
|
kinds = [s.strip() for s in args.kinds.split(",") if s.strip()]
|
||||||
|
|
||||||
|
edges = fetch_edges_for_note(client, cfg.prefix, nid, kinds, args.scope, args.limit)
|
||||||
|
print(json.dumps({"note_id": nid, "count": len(edges), "edges": edges}, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
129
tests/test_edges_smoke.py
Normal file
129
tests/test_edges_smoke.py
Normal file
|
|
@ -0,0 +1,129 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
scripts/test_edges_smoke.py
|
||||||
|
|
||||||
|
Integritäts-Check für mindnet-Edges in Qdrant.
|
||||||
|
Prüft pro Note:
|
||||||
|
- Chunk-Anzahl (mindnet_chunks) = belongs_to-Kanten
|
||||||
|
- next/prev-Kanten: jeweils (#Chunks - 1)
|
||||||
|
- Dedupe: kein Duplikat (key=(kind,source_id,target_id,scope))
|
||||||
|
- references (chunk-scope): vorhanden, wenn Wikilinks erwartet werden (nur Zählreport)
|
||||||
|
- optional note-scope references/backlink: vorhanden, wenn --note-scope-refs genutzt wurde
|
||||||
|
|
||||||
|
Ausgabe: JSON pro Note + Gesamtsummary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import json, os, sys
|
||||||
|
from typing import Dict, Any, List, Tuple, Set
|
||||||
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
|
from app.core.qdrant import QdrantConfig, get_client
|
||||||
|
|
||||||
|
def collections(prefix: str) -> Tuple[str, str, str]:
|
||||||
|
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
||||||
|
|
||||||
|
def scroll_ids(client, collection: str, filt: rest.Filter | None = None, payload=False, limit=256):
|
||||||
|
next_page = None
|
||||||
|
while True:
|
||||||
|
pts, next_page = client.scroll(
|
||||||
|
collection_name=collection,
|
||||||
|
scroll_filter=filt,
|
||||||
|
with_payload=payload,
|
||||||
|
with_vectors=False,
|
||||||
|
limit=limit,
|
||||||
|
offset=next_page,
|
||||||
|
)
|
||||||
|
if not pts:
|
||||||
|
break
|
||||||
|
for p in pts:
|
||||||
|
yield p
|
||||||
|
|
||||||
|
def list_notes(client, prefix: str) -> List[Dict[str, Any]]:
|
||||||
|
notes_col, _, _ = collections(prefix)
|
||||||
|
out = []
|
||||||
|
for p in scroll_ids(client, notes_col, None, payload=True):
|
||||||
|
pl = p.payload or {}
|
||||||
|
nid = pl.get("note_id") or pl.get("id")
|
||||||
|
if nid:
|
||||||
|
out.append({
|
||||||
|
"note_id": nid,
|
||||||
|
"title": pl.get("title"),
|
||||||
|
"type": pl.get("type"),
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
def count_chunks_for_note(client, prefix: str, note_id: str) -> int:
|
||||||
|
_, chunks_col, _ = collections(prefix)
|
||||||
|
filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
|
return sum(1 for _ in scroll_ids(client, chunks_col, filt, payload=False))
|
||||||
|
|
||||||
|
def fetch_edges_for_note(client, prefix: str, note_id: str) -> List[Dict[str, Any]]:
|
||||||
|
_, _, edges_col = collections(prefix)
|
||||||
|
filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
|
return [p.payload or {} for p in scroll_ids(client, edges_col, filt, payload=True)]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
cfg = QdrantConfig.from_env()
|
||||||
|
client = get_client(cfg)
|
||||||
|
notes = list_notes(client, cfg.prefix)
|
||||||
|
|
||||||
|
report = []
|
||||||
|
total = {"notes": 0, "chunks": 0, "belongs_to": 0, "next": 0, "prev": 0, "refs_chunk": 0, "refs_note": 0, "backlink": 0, "dup_edges": 0}
|
||||||
|
for n in notes:
|
||||||
|
nid = n["note_id"]
|
||||||
|
total["notes"] += 1
|
||||||
|
chunk_count = count_chunks_for_note(client, cfg.prefix, nid)
|
||||||
|
total["chunks"] += chunk_count
|
||||||
|
|
||||||
|
edges = fetch_edges_for_note(client, cfg.prefix, nid)
|
||||||
|
by_kind = {}
|
||||||
|
keys: Set[tuple] = set()
|
||||||
|
dup_count = 0
|
||||||
|
for e in edges:
|
||||||
|
k = e.get("kind")
|
||||||
|
by_kind[k] = by_kind.get(k, 0) + 1
|
||||||
|
t = (e.get("kind"), e.get("source_id"), e.get("target_id"), e.get("scope"))
|
||||||
|
if t in keys:
|
||||||
|
dup_count += 1
|
||||||
|
else:
|
||||||
|
keys.add(t)
|
||||||
|
|
||||||
|
bt = by_kind.get("belongs_to", 0)
|
||||||
|
nx = by_kind.get("next", 0)
|
||||||
|
pv = by_kind.get("prev", 0)
|
||||||
|
rc = by_kind.get("references", 0) if any(e.get("scope") == "chunk" and e.get("kind") == "references" for e in edges) else 0
|
||||||
|
rn = sum(1 for e in edges if e.get("scope") == "note" and e.get("kind") == "references")
|
||||||
|
bl = by_kind.get("backlink", 0)
|
||||||
|
|
||||||
|
total["belongs_to"] += bt
|
||||||
|
total["next"] += nx
|
||||||
|
total["prev"] += pv
|
||||||
|
total["refs_chunk"] += rc
|
||||||
|
total["refs_note"] += rn
|
||||||
|
total["backlink"] += bl
|
||||||
|
total["dup_edges"] += dup_count
|
||||||
|
|
||||||
|
ok_bt = (bt == chunk_count)
|
||||||
|
ok_seq = (nx == max(chunk_count - 1, 0) and pv == max(chunk_count - 1, 0))
|
||||||
|
ok_dup = (dup_count == 0)
|
||||||
|
|
||||||
|
report.append({
|
||||||
|
"note_id": nid,
|
||||||
|
"title": n.get("title"),
|
||||||
|
"type": n.get("type"),
|
||||||
|
"chunks": chunk_count,
|
||||||
|
"edges_by_kind": by_kind,
|
||||||
|
"checks": {
|
||||||
|
"belongs_to_equals_chunks": ok_bt,
|
||||||
|
"next_prev_match": ok_seq,
|
||||||
|
"no_duplicate_edges": ok_dup,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
out = {"prefix": cfg.prefix, "summary": total, "notes": report}
|
||||||
|
print(json.dumps(out, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user