scripts/edges_full_check.py hinzugefügt
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
This commit is contained in:
parent
4c56918d8a
commit
cea6d35729
120
scripts/edges_full_check.py
Normal file
120
scripts/edges_full_check.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
from collections import Counter
|
||||||
|
from app.core.qdrant import QdrantConfig, get_client
|
||||||
|
|
||||||
|
def fetch_all(client, col):
|
||||||
|
points = []
|
||||||
|
next_offset = None
|
||||||
|
while True:
|
||||||
|
res = client.scroll(collection_name=col, with_payload=True, with_vectors=False, limit=2048, offset=next_offset)
|
||||||
|
batch = res[0]
|
||||||
|
next_offset = res[1]
|
||||||
|
points.extend(batch)
|
||||||
|
if not next_offset:
|
||||||
|
break
|
||||||
|
return points
|
||||||
|
|
||||||
|
def main():
|
||||||
|
cfg = QdrantConfig.from_env()
|
||||||
|
cl = get_client(cfg)
|
||||||
|
|
||||||
|
cn = f"{cfg.prefix}_notes"
|
||||||
|
cc = f"{cfg.prefix}_chunks"
|
||||||
|
ce = f"{cfg.prefix}_edges"
|
||||||
|
|
||||||
|
notes_cnt = cl.count(collection_name=cn, exact=True).count
|
||||||
|
chunks_cnt = cl.count(collection_name=cc, exact=True).count
|
||||||
|
edges_cnt = cl.count(collection_name=ce, exact=True).count
|
||||||
|
|
||||||
|
chunks = fetch_all(cl, cc)
|
||||||
|
edges = fetch_all(cl, ce)
|
||||||
|
|
||||||
|
chunks_by_note = Counter([c.payload.get("note_id") for c in chunks])
|
||||||
|
e_by_kind = Counter([e.payload.get("kind") or e.payload.get("relation") for e in edges])
|
||||||
|
|
||||||
|
# pro Note: belongs_to == #chunks; next == prev == max(chunks-1, 0)
|
||||||
|
belongs_by_note = Counter()
|
||||||
|
next_by_note = Counter()
|
||||||
|
prev_by_note = Counter()
|
||||||
|
for e in edges:
|
||||||
|
pl = e.payload
|
||||||
|
nid = pl.get("note_id")
|
||||||
|
k = pl.get("kind") or pl.get("relation")
|
||||||
|
if k == "belongs_to":
|
||||||
|
belongs_by_note[nid] += 1
|
||||||
|
elif k == "next":
|
||||||
|
next_by_note[nid] += 1
|
||||||
|
elif k == "prev":
|
||||||
|
prev_by_note[nid] += 1
|
||||||
|
|
||||||
|
per_note = {}
|
||||||
|
ok_belongs = True
|
||||||
|
ok_nextprev = True
|
||||||
|
for nid, ccount in chunks_by_note.items():
|
||||||
|
b = belongs_by_note[nid]
|
||||||
|
n = next_by_note[nid]
|
||||||
|
p = prev_by_note[nid]
|
||||||
|
per_note[nid] = {"chunks": ccount, "belongs_to": b, "next": n, "prev": p, "checks": {
|
||||||
|
"belongs_to_equals_chunks": (b == ccount),
|
||||||
|
"next_prev_match": (n == p == max(ccount-1, 0)),
|
||||||
|
}}
|
||||||
|
ok_belongs &= (b == ccount)
|
||||||
|
ok_nextprev &= (n == p == max(ccount-1, 0))
|
||||||
|
|
||||||
|
# Rule-Statistiken & Dubletten-Prüfung
|
||||||
|
explicit = defaults = callout = inline = 0
|
||||||
|
multi_callout_detected = False
|
||||||
|
callout_key_counts = Counter()
|
||||||
|
dup_keys = set()
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
for e in edges:
|
||||||
|
pl = e.payload
|
||||||
|
rule = (pl.get("rule_id") or "")
|
||||||
|
kind = pl.get("kind") or pl.get("relation")
|
||||||
|
cid = pl.get("chunk_id")
|
||||||
|
sid = pl.get("source_id"); tid = pl.get("target_id"); rel = kind
|
||||||
|
key = (sid, tid, rel, rule)
|
||||||
|
if key in seen:
|
||||||
|
dup_keys.add(key)
|
||||||
|
else:
|
||||||
|
seen.add(key)
|
||||||
|
|
||||||
|
if rule.startswith("callout:edge:v1"):
|
||||||
|
callout += 1
|
||||||
|
callout_key_counts[(cid, kind, rule)] += 1
|
||||||
|
if rule.startswith("inline:rel:v1"):
|
||||||
|
inline += 1
|
||||||
|
if rule.startswith("edge_defaults:"):
|
||||||
|
defaults += 1
|
||||||
|
if rule.startswith("explicit:"):
|
||||||
|
explicit += 1
|
||||||
|
|
||||||
|
for _, cnt in callout_key_counts.items():
|
||||||
|
if cnt >= 2:
|
||||||
|
multi_callout_detected = True
|
||||||
|
break
|
||||||
|
|
||||||
|
report = {
|
||||||
|
"prefix": cfg.prefix,
|
||||||
|
"counts": {
|
||||||
|
"notes": notes_cnt,
|
||||||
|
"chunks": chunks_cnt,
|
||||||
|
"edges": edges_cnt,
|
||||||
|
"edges_by_kind": dict(e_by_kind),
|
||||||
|
"explicit_total": explicit,
|
||||||
|
"defaults_total": defaults,
|
||||||
|
"callout_total": callout,
|
||||||
|
"inline_total": inline,
|
||||||
|
},
|
||||||
|
"per_note_checks": per_note,
|
||||||
|
"multi_callout_detected": multi_callout_detected,
|
||||||
|
"has_duplicates": (len(dup_keys) > 0),
|
||||||
|
}
|
||||||
|
print(json.dumps(report, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user