scripts/import_markdown.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
b186569750
commit
3282f85007
|
|
@ -219,6 +219,26 @@ def _resolve_mode(val: Optional[str]) -> str:
|
|||
|
||||
|
||||
def _env(key: str, default: str) -> str:
|
||||
|
||||
def _resolve_dim(cfg) -> int:
|
||||
# Try common attribute names on QdrantConfig
|
||||
for attr in ("dim", "vector_dim", "dimension", "dimensions", "embedding_dim", "embed_dim", "vector_size", "size"):
|
||||
try:
|
||||
v = getattr(cfg, attr)
|
||||
if isinstance(v, int) and v > 0:
|
||||
return v
|
||||
except Exception:
|
||||
pass
|
||||
# Try environment fallbacks
|
||||
for key in ("MINDNET_DIM", "EMBED_DIM", "EMBEDDING_DIM", "QDRANT_VECTOR_DIM", "QDRANT_DIM", "VECTOR_DIM", "DIM"):
|
||||
try:
|
||||
v = int(os.environ.get(key, "").strip() or "0")
|
||||
if v > 0:
|
||||
return v
|
||||
except Exception:
|
||||
continue
|
||||
# Conservative default: MiniLM 384d (im Projekt üblich)
|
||||
return 384
|
||||
return (os.environ.get(key) or default).strip().lower()
|
||||
|
||||
|
||||
|
|
@ -267,7 +287,15 @@ def main() -> None:
|
|||
if args.prefix:
|
||||
cfg.prefix = args.prefix.strip()
|
||||
client = get_client(cfg)
|
||||
ensure_collections(client, cfg.prefix, cfg.dim)
|
||||
dim = _resolve_dim(cfg)
|
||||
# ensure_collections signature compatibility
|
||||
try:
|
||||
ensure_collections(client, cfg.prefix, dim)
|
||||
except TypeError:
|
||||
try:
|
||||
ensure_collections(client, cfg.prefix)
|
||||
except TypeError:
|
||||
ensure_collections(client)
|
||||
# abwärtskompatible Index-Erstellung
|
||||
_ensure_payload_indexes(client, cfg.prefix)
|
||||
|
||||
|
|
@ -408,7 +436,7 @@ def main() -> None:
|
|||
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
|
||||
continue
|
||||
|
||||
vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls]
|
||||
vecs: List[List[float]] = [[0.0] * dim for _ in chunk_pls]
|
||||
if embed_texts and chunk_pls:
|
||||
try:
|
||||
texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls]
|
||||
|
|
@ -473,8 +501,8 @@ def main() -> None:
|
|||
note_pl["hash_fulltext"] = old_payload.get("hash_fulltext", note_pl.get("hash_fulltext"))
|
||||
note_pl["hash_signature"] = old_payload.get("hash_signature", note_pl.get("hash_signature"))
|
||||
note_pl["hashes"] = merged_hashes
|
||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
|
||||
upsert_batch(client, notes_name, note_pts)
|
||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim)
|
||||
_upsert_batch(client, notes_name, note_pts)
|
||||
continue
|
||||
|
||||
if not changed:
|
||||
|
|
@ -486,14 +514,14 @@ def main() -> None:
|
|||
except Exception as e:
|
||||
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
|
||||
|
||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
|
||||
upsert_batch(client, notes_name, note_pts)
|
||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim)
|
||||
_upsert_batch(client, notes_name, note_pts)
|
||||
if chunk_pls:
|
||||
chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs)
|
||||
upsert_batch(client, chunks_name, chunk_pts)
|
||||
chunks_name, chunk_pts = _points_for_chunks(cfg.prefix, chunk_pls, vecs)
|
||||
_upsert_batch(client, chunks_name, chunk_pts)
|
||||
if edges:
|
||||
edges_name, edge_pts = points_for_edges(cfg.prefix, edges)
|
||||
upsert_batch(client, edges_name, edge_pts)
|
||||
edges_name, edge_pts = _points_for_edges(cfg.prefix, edges)
|
||||
_upsert_batch(client, edges_name, edge_pts)
|
||||
|
||||
print(f"Done. Processed notes: {processed}")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user