scripts/import_markdown.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-08 15:35:00 +01:00
parent b186569750
commit 3282f85007

View File

@ -219,6 +219,26 @@ def _resolve_mode(val: Optional[str]) -> str:
def _env(key: str, default: str) -> str:
def _resolve_dim(cfg) -> int:
# Try common attribute names on QdrantConfig
for attr in ("dim", "vector_dim", "dimension", "dimensions", "embedding_dim", "embed_dim", "vector_size", "size"):
try:
v = getattr(cfg, attr)
if isinstance(v, int) and v > 0:
return v
except Exception:
pass
# Try environment fallbacks
for key in ("MINDNET_DIM", "EMBED_DIM", "EMBEDDING_DIM", "QDRANT_VECTOR_DIM", "QDRANT_DIM", "VECTOR_DIM", "DIM"):
try:
v = int(os.environ.get(key, "").strip() or "0")
if v > 0:
return v
except Exception:
continue
# Conservative default: MiniLM 384d (im Projekt üblich)
return 384
return (os.environ.get(key) or default).strip().lower()
@ -267,7 +287,15 @@ def main() -> None:
if args.prefix:
cfg.prefix = args.prefix.strip()
client = get_client(cfg)
ensure_collections(client, cfg.prefix, cfg.dim)
dim = _resolve_dim(cfg)
# ensure_collections signature compatibility
try:
ensure_collections(client, cfg.prefix, dim)
except TypeError:
try:
ensure_collections(client, cfg.prefix)
except TypeError:
ensure_collections(client)
# abwärtskompatible Index-Erstellung
_ensure_payload_indexes(client, cfg.prefix)
@ -408,7 +436,7 @@ def main() -> None:
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
continue
vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls]
vecs: List[List[float]] = [[0.0] * dim for _ in chunk_pls]
if embed_texts and chunk_pls:
try:
texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls]
@ -473,8 +501,8 @@ def main() -> None:
note_pl["hash_fulltext"] = old_payload.get("hash_fulltext", note_pl.get("hash_fulltext"))
note_pl["hash_signature"] = old_payload.get("hash_signature", note_pl.get("hash_signature"))
note_pl["hashes"] = merged_hashes
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
upsert_batch(client, notes_name, note_pts)
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim)
_upsert_batch(client, notes_name, note_pts)
continue
if not changed:
@ -486,14 +514,14 @@ def main() -> None:
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
upsert_batch(client, notes_name, note_pts)
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim)
_upsert_batch(client, notes_name, note_pts)
if chunk_pls:
chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs)
upsert_batch(client, chunks_name, chunk_pts)
chunks_name, chunk_pts = _points_for_chunks(cfg.prefix, chunk_pls, vecs)
_upsert_batch(client, chunks_name, chunk_pts)
if edges:
edges_name, edge_pts = points_for_edges(cfg.prefix, edges)
upsert_batch(client, edges_name, edge_pts)
edges_name, edge_pts = _points_for_edges(cfg.prefix, edges)
_upsert_batch(client, edges_name, edge_pts)
print(f"Done. Processed notes: {processed}")