scripts/import_markdown.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
b186569750
commit
3282f85007
|
|
@ -219,6 +219,26 @@ def _resolve_mode(val: Optional[str]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def _env(key: str, default: str) -> str:
|
def _env(key: str, default: str) -> str:
|
||||||
|
|
||||||
|
def _resolve_dim(cfg) -> int:
|
||||||
|
# Try common attribute names on QdrantConfig
|
||||||
|
for attr in ("dim", "vector_dim", "dimension", "dimensions", "embedding_dim", "embed_dim", "vector_size", "size"):
|
||||||
|
try:
|
||||||
|
v = getattr(cfg, attr)
|
||||||
|
if isinstance(v, int) and v > 0:
|
||||||
|
return v
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Try environment fallbacks
|
||||||
|
for key in ("MINDNET_DIM", "EMBED_DIM", "EMBEDDING_DIM", "QDRANT_VECTOR_DIM", "QDRANT_DIM", "VECTOR_DIM", "DIM"):
|
||||||
|
try:
|
||||||
|
v = int(os.environ.get(key, "").strip() or "0")
|
||||||
|
if v > 0:
|
||||||
|
return v
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
# Conservative default: MiniLM 384d (im Projekt üblich)
|
||||||
|
return 384
|
||||||
return (os.environ.get(key) or default).strip().lower()
|
return (os.environ.get(key) or default).strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -267,7 +287,15 @@ def main() -> None:
|
||||||
if args.prefix:
|
if args.prefix:
|
||||||
cfg.prefix = args.prefix.strip()
|
cfg.prefix = args.prefix.strip()
|
||||||
client = get_client(cfg)
|
client = get_client(cfg)
|
||||||
ensure_collections(client, cfg.prefix, cfg.dim)
|
dim = _resolve_dim(cfg)
|
||||||
|
# ensure_collections signature compatibility
|
||||||
|
try:
|
||||||
|
ensure_collections(client, cfg.prefix, dim)
|
||||||
|
except TypeError:
|
||||||
|
try:
|
||||||
|
ensure_collections(client, cfg.prefix)
|
||||||
|
except TypeError:
|
||||||
|
ensure_collections(client)
|
||||||
# abwärtskompatible Index-Erstellung
|
# abwärtskompatible Index-Erstellung
|
||||||
_ensure_payload_indexes(client, cfg.prefix)
|
_ensure_payload_indexes(client, cfg.prefix)
|
||||||
|
|
||||||
|
|
@ -408,7 +436,7 @@ def main() -> None:
|
||||||
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
|
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls]
|
vecs: List[List[float]] = [[0.0] * dim for _ in chunk_pls]
|
||||||
if embed_texts and chunk_pls:
|
if embed_texts and chunk_pls:
|
||||||
try:
|
try:
|
||||||
texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls]
|
texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls]
|
||||||
|
|
@ -473,8 +501,8 @@ def main() -> None:
|
||||||
note_pl["hash_fulltext"] = old_payload.get("hash_fulltext", note_pl.get("hash_fulltext"))
|
note_pl["hash_fulltext"] = old_payload.get("hash_fulltext", note_pl.get("hash_fulltext"))
|
||||||
note_pl["hash_signature"] = old_payload.get("hash_signature", note_pl.get("hash_signature"))
|
note_pl["hash_signature"] = old_payload.get("hash_signature", note_pl.get("hash_signature"))
|
||||||
note_pl["hashes"] = merged_hashes
|
note_pl["hashes"] = merged_hashes
|
||||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
|
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim)
|
||||||
upsert_batch(client, notes_name, note_pts)
|
_upsert_batch(client, notes_name, note_pts)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not changed:
|
if not changed:
|
||||||
|
|
@ -486,14 +514,14 @@ def main() -> None:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
|
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
|
||||||
|
|
||||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
|
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim)
|
||||||
upsert_batch(client, notes_name, note_pts)
|
_upsert_batch(client, notes_name, note_pts)
|
||||||
if chunk_pls:
|
if chunk_pls:
|
||||||
chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs)
|
chunks_name, chunk_pts = _points_for_chunks(cfg.prefix, chunk_pls, vecs)
|
||||||
upsert_batch(client, chunks_name, chunk_pts)
|
_upsert_batch(client, chunks_name, chunk_pts)
|
||||||
if edges:
|
if edges:
|
||||||
edges_name, edge_pts = points_for_edges(cfg.prefix, edges)
|
edges_name, edge_pts = _points_for_edges(cfg.prefix, edges)
|
||||||
upsert_batch(client, edges_name, edge_pts)
|
_upsert_batch(client, edges_name, edge_pts)
|
||||||
|
|
||||||
print(f"Done. Processed notes: {processed}")
|
print(f"Done. Processed notes: {processed}")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user