Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-08 16:24:03 +01:00
parent 79bb8a143c
commit 6713f342c3

View File

@ -2,13 +2,16 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Script: scripts/import_markdown.py Markdown Qdrant (Notes, Chunks, Edges) Script: scripts/import_markdown.py Markdown Qdrant (Notes, Chunks, Edges)
Version: 3.8.4 Version: 3.8.5
Date: 2025-11-08 Date: 2025-11-08
Changes vs 3.8.3 Notes
----------------- -----
- Fixed SyntaxError by moving `import uuid` to top-level and simplifying the UUIDv5 helper. - Uses compatibility wrappers for ensure_collections and payload index creation.
- No functional changes otherwise. - Provides robust local fallbacks for qdrant_points helpers.
- Generates valid Qdrant point IDs (int or UUIDv5) if none provided.
- Detects Named-Vector schema and coerces points accordingly.
- Integrates Type-Registry without breaking older behavior.
""" """
from __future__ import annotations from __future__ import annotations
@ -28,6 +31,7 @@ def _uuid5_deterministic(*parts: str) -> str:
base = ":".join(str(p) for p in parts if p is not None) base = ":".join(str(p) for p in parts if p is not None)
return str(_uuid.uuid5(_MN_NAMESPACE, base)) return str(_uuid.uuid5(_MN_NAMESPACE, base))
# --- Project imports (as in stable 20251105) ---
from app.core.parser import ( from app.core.parser import (
read_markdown, read_markdown,
normalize_frontmatter, normalize_frontmatter,
@ -41,11 +45,10 @@ try:
except Exception: # pragma: no cover except Exception: # pragma: no cover
from app.core.edges import build_edges_for_note # type: ignore from app.core.edges import build_edges_for_note # type: ignore
# Qdrant-Basics
from app.core.qdrant import ( from app.core.qdrant import (
QdrantConfig, QdrantConfig,
get_client, get_client,
ensure_collections, # used only via compatibility wrapper below ensure_collections, # used only via wrapper
) )
# Backward-compatible import for payload index creation # Backward-compatible import for payload index creation
@ -56,10 +59,10 @@ except Exception:
from app.core.qdrant import ensure_payload_indices as _ensure_payload_indexes # older name from app.core.qdrant import ensure_payload_indices as _ensure_payload_indexes # older name
except Exception: except Exception:
def _ensure_payload_indexes(*_args, **_kwargs): def _ensure_payload_indexes(*_args, **_kwargs):
# No-Op: older releases without dedicated index creation # No-Op for older releases without explicit payload index creation
return None return None
# Qdrant points helpers (robust against older names / missing module functions) # Qdrant points helpers (try project first, then safe local fallbacks)
try: try:
from app.core.qdrant_points import ( from app.core.qdrant_points import (
points_for_chunks as _points_for_chunks, points_for_chunks as _points_for_chunks,
@ -68,17 +71,20 @@ try:
upsert_batch as _upsert_batch, upsert_batch as _upsert_batch,
) )
except Exception: except Exception:
# Local fallbacks (No-Break) # ---- Local fallbacks ----
from qdrant_client.http import models as _rest
def _collection_names(prefix: str): def _collection_names(prefix: str):
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
def _points_for_note(prefix: str, note_payload: dict, note_vec, dim: int): def _points_for_note(prefix: str, note_payload: dict, note_vec, dim: int):
notes_col, _, _ = _collection_names(prefix) notes_col, _, _ = _collection_names(prefix)
raw = note_payload.get("point_id") or note_payload.get("qdrant_id") or note_payload.get("note_id") or note_payload.get("id") or note_payload.get("path") raw = (
# Accept integer IDs if provided note_payload.get("point_id")
pid = None or note_payload.get("qdrant_id")
or note_payload.get("note_id")
or note_payload.get("id")
or note_payload.get("path")
)
pid: Any = None
if isinstance(raw, int): if isinstance(raw, int):
pid = raw pid = raw
elif isinstance(raw, str) and raw.isdigit(): elif isinstance(raw, str) and raw.isdigit():
@ -89,15 +95,21 @@ except Exception:
if pid is None: if pid is None:
pid = _uuid5_deterministic("note", str(raw or "")) pid = _uuid5_deterministic("note", str(raw or ""))
vec = note_vec if note_vec is not None else [0.0] * int(dim) vec = note_vec if note_vec is not None else [0.0] * int(dim)
pt = _rest.PointStruct(id=pid, vector=vec, payload=note_payload) pt = rest.PointStruct(id=pid, vector=vec, payload=note_payload)
return notes_col, [pt] return notes_col, [pt]
def _points_for_chunks(prefix: str, chunk_payloads: list[dict], vectors: list[list[float]]): def _points_for_chunks(prefix: str, chunk_payloads: list[dict], vectors: list[list[float]]):
_, chunks_col, _ = _collection_names(prefix) _, chunks_col, _ = _collection_names(prefix)
pts = [] pts = []
for i, pl in enumerate(chunk_payloads): for i, pl in enumerate(chunk_payloads):
raw = pl.get("point_id") or pl.get("qdrant_id") or pl.get("chunk_id") or pl.get("id") or f"{pl.get('note_id','missing')}#{i+1}" raw = (
pid = None pl.get("point_id")
or pl.get("qdrant_id")
or pl.get("chunk_id")
or pl.get("id")
or f"{pl.get('note_id','missing')}#{i+1}"
)
pid: Any = None
if isinstance(raw, int): if isinstance(raw, int):
pid = raw pid = raw
elif isinstance(raw, str) and raw.isdigit(): elif isinstance(raw, str) and raw.isdigit():
@ -110,7 +122,7 @@ except Exception:
vec = vectors[i] if i < len(vectors) else None vec = vectors[i] if i < len(vectors) else None
if vec is None: if vec is None:
continue continue
pts.append(_rest.PointStruct(id=pid, vector=vec, payload=pl)) pts.append(rest.PointStruct(id=pid, vector=vec, payload=pl))
return chunks_col, pts return chunks_col, pts
def _points_for_edges(prefix: str, edges: list[dict]): def _points_for_edges(prefix: str, edges: list[dict]):
@ -124,7 +136,7 @@ except Exception:
raw = e.get("point_id") or e.get("qdrant_id") raw = e.get("point_id") or e.get("qdrant_id")
if raw is None: if raw is None:
raw = f"{nid}:{kind}:{src_id}->{dst_id}:{i}" raw = f"{nid}:{kind}:{src_id}->{dst_id}:{i}"
pid = None pid: Any = None
if isinstance(raw, int): if isinstance(raw, int):
pid = raw pid = raw
elif isinstance(raw, str) and raw.isdigit(): elif isinstance(raw, str) and raw.isdigit():
@ -134,14 +146,14 @@ except Exception:
pid = None pid = None
if pid is None: if pid is None:
pid = _uuid5_deterministic("edge", str(raw)) pid = _uuid5_deterministic("edge", str(raw))
pts.append(_rest.PointStruct(id=pid, vector=None, payload=e)) pts.append(rest.PointStruct(id=pid, vector=None, payload=e))
return edges_col, pts return edges_col, pts
def _upsert_batch(client, collection_name: str, points: list): def _upsert_batch(client, collection_name: str, points: list):
if not points: if not points:
return return
pts = _coerce_points_for_collection(client, collection_name, points) pts = _coerce_points_for_collection(client, collection_name, points)
client.upsert(collection_name=collection_name, points=pts, wait=True) client.upsert(collection_name=collection_name, points=pts, wait=True)
# Type-Registry (optional) # Type-Registry (optional)
try: try:
@ -277,12 +289,10 @@ def _resolve_dim(cfg) -> int:
return v return v
except Exception: except Exception:
continue continue
# Conservative default: MiniLM 384d # Conservative default
return 384 return 384
# ---- Compatibility wrappers (no direct calls to project-specific signatures) ----
# ---- Qdrant vector schema detection & point coercion ---- # ---- Qdrant vector schema detection & point coercion ----
def _get_vector_schema(client, collection_name: str): def _get_vector_schema(client, collection_name: str):
""" """
@ -293,17 +303,20 @@ def _get_vector_schema(client, collection_name: str):
try: try:
info = client.get_collection(collection_name=collection_name) info = client.get_collection(collection_name=collection_name)
vecs = getattr(info, "vectors", None) vecs = getattr(info, "vectors", None)
# Single-vector config
if hasattr(vecs, "size") and isinstance(vecs.size, int): if hasattr(vecs, "size") and isinstance(vecs.size, int):
return {"kind": "single", "size": vecs.size} return {"kind": "single", "size": vecs.size}
if hasattr(vecs, "config"): # Named-vectors config
# NamedVectors as dict-like in .config cfg = getattr(vecs, "config", None)
names = list(getattr(vecs, "config", {}).keys()) if isinstance(cfg, dict) and cfg:
names = list(cfg.keys())
if names: if names:
return {"kind": "named", "names": names} return {"kind": "named", "names": names}
except Exception: except Exception:
pass pass
return {"kind": "single", "size": None} return {"kind": "single", "size": None}
def _coerce_points_for_collection(client, collection_name: str, points: list): def _coerce_points_for_collection(client, collection_name: str, points: list):
""" """
If collection uses named vectors, wrap each point's .vector into .vectors{<name>: vector}. If collection uses named vectors, wrap each point's .vector into .vectors{<name>: vector}.
@ -317,20 +330,19 @@ def _coerce_points_for_collection(client, collection_name: str, points: list):
if not names: if not names:
return points return points
primary = names[0] primary = names[0]
from qdrant_client.http import models as _rest
fixed = [] fixed = []
for pt in points: for pt in points:
# pt may be a dataclass; create a new PointStruct when needed
vec = getattr(pt, "vector", None) vec = getattr(pt, "vector", None)
if vec is not None: if vec is not None:
fixed.append(_rest.PointStruct(id=pt.id, vectors={primary: vec}, payload=pt.payload)) fixed.append(rest.PointStruct(id=pt.id, vectors={primary: vec}, payload=pt.payload))
else: else:
# keep as-is (no vectors)
fixed.append(pt) fixed.append(pt)
return fixed return fixed
except Exception: except Exception:
return points return points
# ---- Compatibility wrappers (no direct calls to project-specific signatures) ----
def _ensure_collections_compat(client, cfg, dim): def _ensure_collections_compat(client, cfg, dim):
""" """
Call ensure_collections with the correct signature across releases: Call ensure_collections with the correct signature across releases:
@ -356,6 +368,7 @@ def _ensure_collections_compat(client, cfg, dim):
# If everything fails, do nothing # If everything fails, do nothing
return None return None
def _ensure_payload_indexes_compat(client, cfg): def _ensure_payload_indexes_compat(client, cfg):
""" """
Try calling payload index creation with cfg, then prefix; ignore if unsupported. Try calling payload index creation with cfg, then prefix; ignore if unsupported.
@ -592,7 +605,6 @@ def main() -> None:
except Exception as e: except Exception as e:
edges_failed = True edges_failed = True
edges = [] edges = []
# WICHTIG: Wir brechen NICHT mehr ab — Note & Chunks werden geschrieben.
print(json.dumps({"path": path, "note_id": note_id, "warn": f"build_edges_for_note failed, skipping edges: {type(e).__name__}: {e}"})) print(json.dumps({"path": path, "note_id": note_id, "warn": f"build_edges_for_note failed, skipping edges: {type(e).__name__}: {e}"}))
# -------- Summary -------- # -------- Summary --------