All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
134 lines
3.6 KiB
Python
134 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from collections import Counter, defaultdict
|
|
|
|
from qdrant_client.http import models as rest
|
|
from app.core.qdrant import QdrantConfig, get_client
|
|
|
|
|
|
def _rel(pl: dict) -> str:
|
|
return pl.get("relation") or pl.get("kind") or "edge"
|
|
|
|
|
|
def _scroll(client, col):
|
|
pts = []
|
|
next_page = None
|
|
while True:
|
|
res, next_page = client.scroll(
|
|
collection_name=col,
|
|
with_payload=True,
|
|
with_vectors=False,
|
|
limit=1024,
|
|
offset=next_page,
|
|
)
|
|
pts.extend(res)
|
|
if next_page is None:
|
|
break
|
|
return pts
|
|
|
|
|
|
def main():
|
|
cfg = QdrantConfig.from_env()
|
|
client = get_client(cfg)
|
|
prefix = os.environ.get("COLLECTION_PREFIX", cfg.prefix)
|
|
|
|
cols = {
|
|
"notes": f"{prefix}_notes",
|
|
"chunks": f"{prefix}_chunks",
|
|
"edges": f"{prefix}_edges",
|
|
}
|
|
|
|
# Index: notes -> title/type
|
|
notes_meta = {}
|
|
for p in _scroll(client, cols["notes"]):
|
|
pl = p.payload or {}
|
|
nid = pl.get("note_id")
|
|
if nid:
|
|
notes_meta[nid] = {
|
|
"title": pl.get("title", ""),
|
|
"type": pl.get("type", ""),
|
|
}
|
|
|
|
# chunks je note
|
|
chunks_by_note = defaultdict(int)
|
|
for p in _scroll(client, cols["chunks"]):
|
|
pl = p.payload or {}
|
|
nid = pl.get("note_id")
|
|
if nid:
|
|
chunks_by_note[nid] += 1
|
|
|
|
# edges je note
|
|
edges_by_note = defaultdict(list)
|
|
edges_all = _scroll(client, cols["edges"])
|
|
for p in edges_all:
|
|
pl = p.payload or {}
|
|
nid = pl.get("note_id")
|
|
if nid:
|
|
edges_by_note[nid].append(pl)
|
|
|
|
# pro note ausgeben
|
|
summary_edges = Counter()
|
|
total_chunks = 0
|
|
for nid in sorted(notes_meta.keys()):
|
|
meta = notes_meta[nid]
|
|
chunks = chunks_by_note.get(nid, 0)
|
|
total_chunks += chunks
|
|
|
|
kinds = Counter(_rel(pl) for pl in edges_by_note[nid])
|
|
summary_edges.update(kinds)
|
|
|
|
row = {
|
|
"note_id": nid,
|
|
"title": meta["title"],
|
|
"type": meta["type"],
|
|
"chunks": chunks,
|
|
"edges_by_kind": dict(kinds),
|
|
"checks": {
|
|
"belongs_to_equals_chunks": (kinds.get("belongs_to", 0) == chunks),
|
|
"next_prev_match": (kinds.get("next", 0) == kinds.get("prev", 0) == max(0, chunks - 1)),
|
|
"no_duplicate_edges": _no_dupes(edges_by_note[nid]),
|
|
},
|
|
}
|
|
print(json.dumps(row, ensure_ascii=False))
|
|
|
|
# Gesamtsummary
|
|
total_notes = len(notes_meta)
|
|
out = {
|
|
"prefix": prefix,
|
|
"summary": {
|
|
"notes": total_notes,
|
|
"chunks": total_chunks,
|
|
"belongs_to": summary_edges.get("belongs_to", 0),
|
|
"next": summary_edges.get("next", 0),
|
|
"prev": summary_edges.get("prev", 0),
|
|
"refs_chunk": summary_edges.get("references", 0),
|
|
"refs_note": summary_edges.get("references_note", 0),
|
|
"backlink": summary_edges.get("backlink", 0),
|
|
"dup_edges": 0, # per-Note geprüft
|
|
},
|
|
}
|
|
print(json.dumps(out, ensure_ascii=False))
|
|
|
|
|
|
def _no_dupes(pls):
|
|
seen = set()
|
|
for pl in pls:
|
|
key = (
|
|
str(pl.get("source_id") or ""),
|
|
str(pl.get("target_id") or ""),
|
|
str(pl.get("relation") or pl.get("kind") or ""),
|
|
str(pl.get("rule_id") or ""),
|
|
)
|
|
if key in seen:
|
|
return False
|
|
seen.add(key)
|
|
return True
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|