From b18656975055796c4d8b914b4423713e35af0361 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 15:32:08 +0100 Subject: [PATCH] scripts/import_markdown.py aktualisiert --- scripts/import_markdown.py | 58 ++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 8f3c69c..88876f5 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -52,12 +52,58 @@ except Exception: # No-Op: ältere Releases ohne dedizierte Index-Funktion return None -from app.core.qdrant_points import ( - points_for_chunks, - points_for_note, - points_for_edges, - upsert_batch, -) +# Qdrant-Points Helfer (robust gegen ältere Namen/fehlende Module) +try: + from app.core.qdrant_points import ( + points_for_chunks as _points_for_chunks, + points_for_note as _points_for_note, + points_for_edges as _points_for_edges, + upsert_batch as _upsert_batch, + ) +except Exception: + # Lokale Fallback-Implementierungen (No-Break) + from qdrant_client.http import models as _rest + + def _collection_names(prefix: str): + return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" + + def _points_for_note(prefix: str, note_payload: dict, note_vec, dim: int): + notes_col, _, _ = _collection_names(prefix) + vec = note_vec if note_vec is not None else [0.0] * int(dim) + pid = note_payload.get("note_id") or note_payload.get("id") or "missing-note-id" + pt = _rest.PointStruct(id=str(pid), vector=vec, payload=note_payload) + return notes_col, [pt] + + def _points_for_chunks(prefix: str, chunk_payloads: list[dict], vectors: list[list[float]]): + _, chunks_col, _ = _collection_names(prefix) + pts = [] + for i, pl in enumerate(chunk_payloads): + pid = pl.get("chunk_id") or pl.get("id") or f"{pl.get('note_id','missing')}#{i+1}" + vec = vectors[i] if i < len(vectors) else None + if vec is None: + continue + pts.append(_rest.PointStruct(id=str(pid), vector=vec, payload=pl)) + return chunks_col, pts + + def _points_for_edges(prefix: str, edges: list[dict]): + _, _, edges_col = _collection_names(prefix) + pts = [] + for i, e in enumerate(edges): + # stabiler String-ID-Aufbau + src = e.get("source_id") or e.get("src_id") or "src" + dst = e.get("target_id") or e.get("dst_id") or "dst" + kind = e.get("kind") or e.get("edge_type") or "edge" + nid = e.get("note_id") or "note" + pid = f"{nid}:{kind}:{src}->{dst}:{i}" + pts.append(_rest.PointStruct(id=str(pid), vector=None, payload=e)) + return edges_col, pts + + def _upsert_batch(client, collection_name: str, points: list): + if not points: + return + client.upsert(collection_name=collection_name, points=points, wait=True) + + # NEU: Type-Registry (optional) try: