Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
804eeef858
commit
8bf3707511
|
|
@ -4,27 +4,6 @@
|
||||||
Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges)
|
Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges)
|
||||||
Version: 3.7.2
|
Version: 3.7.2
|
||||||
Datum: 2025-09-30
|
Datum: 2025-09-30
|
||||||
# ---- helpers ----
|
|
||||||
def effective_chunk_profile(note_type: str, registry: dict) -> str | None:
|
|
||||||
try:
|
|
||||||
tcfg = (registry or {}).get("types", {}).get(note_type) or (registry or {}).get("types", {}).get("concept")
|
|
||||||
prof = (tcfg or {}).get("chunk_profile")
|
|
||||||
if isinstance(prof, str) and prof in {"short", "medium", "long"}:
|
|
||||||
return prof
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
def effective_retriever_weight(note_type: str, registry: dict) -> float | None:
|
|
||||||
try:
|
|
||||||
tcfg = (registry or {}).get("types", {}).get(note_type) or (registry or {}).get("types", {}).get("concept")
|
|
||||||
w = (tcfg or {}).get("retriever_weight")
|
|
||||||
if w is None:
|
|
||||||
return None
|
|
||||||
return float(w)
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
Kurzbeschreibung
|
Kurzbeschreibung
|
||||||
----------------
|
----------------
|
||||||
|
|
@ -372,39 +351,11 @@ def main() -> None:
|
||||||
|
|
||||||
changed = args.force_replace or (not has_old) or hash_changed or text_changed
|
changed = args.force_replace or (not has_old) or hash_changed or text_changed
|
||||||
do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed)
|
do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed)
|
||||||
|
|
||||||
# -------- Chunks / Embeddings --------
|
# -------- Chunks / Embeddings --------
|
||||||
chunk_pls: List[Dict[str, Any]] = []
|
chunk_pls: List[Dict[str, Any]] = []
|
||||||
try:
|
try:
|
||||||
body_text = getattr(parsed, "body", "") or ""
|
body_text = getattr(parsed, "body", "") or ""
|
||||||
# ---- Type-Registry integration ----
|
|
||||||
try:
|
|
||||||
note_type = resolve_note_type(fm.get("type"), reg)
|
|
||||||
except Exception:
|
|
||||||
note_type = (fm.get("type") or "concept")
|
|
||||||
fm["type"] = note_type or "concept"
|
|
||||||
try:
|
|
||||||
cfg_type = get_type_config(note_type, reg)
|
|
||||||
except Exception:
|
|
||||||
cfg_type = {}
|
|
||||||
prof = effective_chunk_profile(note_type, reg)
|
|
||||||
if prof:
|
|
||||||
fm["chunk_profile"] = prof
|
|
||||||
weight = cfg_type.get("retriever_weight")
|
|
||||||
if weight is not None:
|
|
||||||
fm["retriever_weight"] = float(weight)
|
|
||||||
|
|
||||||
# Ensure note-level payload reflects registry fields
|
|
||||||
try:
|
|
||||||
if isinstance(note_pl, dict):
|
|
||||||
if fm.get("type"):
|
|
||||||
note_pl["type"] = fm["type"]
|
|
||||||
if fm.get("chunk_profile"):
|
|
||||||
note_pl["chunk_profile"] = fm["chunk_profile"]
|
|
||||||
if "retriever_weight" in fm:
|
|
||||||
note_pl["retriever_weight"] = fm["retriever_weight"]
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept"))
|
chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept"))
|
||||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -419,7 +370,6 @@ def main() -> None:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"}))
|
print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"}))
|
||||||
|
|
||||||
|
|
||||||
# -------- Edges (robust) --------
|
# -------- Edges (robust) --------
|
||||||
edges: List[Dict[str, Any]] = []
|
edges: List[Dict[str, Any]] = []
|
||||||
edges_failed = False
|
edges_failed = False
|
||||||
|
|
@ -497,4 +447,32 @@ def main() -> None:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Type-Registry helper shims (safe if unused) ---
|
||||||
|
|
||||||
|
def effective_chunk_profile(note_type: str, registry: dict) -> str | None:
|
||||||
|
try:
|
||||||
|
reg = registry or {}
|
||||||
|
types = reg.get("types", {}) if isinstance(reg, dict) else {}
|
||||||
|
# take exact type or fallback to concept
|
||||||
|
cfg = types.get(note_type) or types.get("concept") or {}
|
||||||
|
prof = cfg.get("chunk_profile")
|
||||||
|
if isinstance(prof, str) and prof in {"short", "medium", "long"}:
|
||||||
|
return prof
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def effective_retriever_weight(note_type: str, registry: dict) -> float | None:
|
||||||
|
try:
|
||||||
|
reg = registry or {}
|
||||||
|
types = reg.get("types", {}) if isinstance(reg, dict) else {}
|
||||||
|
cfg = types.get(note_type) or types.get("concept") or {}
|
||||||
|
w = cfg.get("retriever_weight")
|
||||||
|
if w is None:
|
||||||
|
return None
|
||||||
|
return float(w)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user