mindnet/app/core/chunk_payload.py
Lars 6dc37ccb66
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
Dateien nach "app/core" hochladen
2025-11-08 22:06:21 +01:00

145 lines
4.6 KiB
Python

"""
chunk_payload.py — Mindnet payload helpers
Version: 0.5.2 (generated 2025-11-08 21:03:48)
Purpose:
- Build CHUNK payloads list while preserving existing chunk fields (text, seq, etc.).
- Inject into *every* chunk:
* retriever_weight (resolved like note payload)
* chunk_profile (resolved like note payload)
Resolution order identical to note_payload.make_note_payload.
Signature tolerant to match existing importers.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Union
from pathlib import Path
import os
try:
import yaml # type: ignore
except Exception: # pragma: no cover
yaml = None # will skip YAML loading if unavailable
def _coerce_mapping(obj: Any) -> Dict[str, Any]:
if obj is None:
return {{}}
if isinstance(obj, dict):
return dict(obj)
out: Dict[str, Any] = {{}}
if hasattr(obj, "__dict__"):
out.update(getattr(obj, "__dict__"))
for k in ("id","note_id","title","type","path","source_path","frontmatter"):
if hasattr(obj, k) and k not in out:
out[k] = getattr(obj, k)
return out
def _coerce_chunk_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
d = {{}}
# common attributes for a chunk object
for k in ("chunk_id","id","note_id","seq","start","end","text","title","type","source_path"):
if hasattr(obj, k):
d[k] = getattr(obj, k)
if hasattr(obj, "__dict__"):
for k,v in obj.__dict__.items():
d.setdefault(k, v)
return d
def _get_frontmatter(parsed: Dict[str, Any]) -> Dict[str, Any]:
fm = parsed.get("frontmatter")
return dict(fm) if isinstance(fm, dict) else {{}}
def _load_types_from_yaml(types_file: Optional[Union[str, Path]]) -> Dict[str, Any]:
if types_file is None:
for cand in (Path("config/types.yaml"), Path("config/types.yml"), Path("config.yaml"), Path("config.yml")):
if cand.exists():
types_file = cand
break
if types_file is None or yaml is None:
return {{}}
p = Path(types_file)
if not p.exists():
return {{}}
try:
data = yaml.safe_load(p.read_text(encoding="utf-8"))
if not isinstance(data, dict):
return {{}}
if "types" in data and isinstance(data["types"], dict):
return dict(data["types"])
return data
except Exception:
return {{}}
def _resolve_type_defaults(note_type: Optional[str], types: Optional[Dict[str,Any]]) -> Dict[str, Any]:
if not note_type or not types or not isinstance(types, dict):
return {{}}
block = types.get(note_type)
return dict(block) if isinstance(block, dict) else {{}}
def _to_float(val: Any, fallback: float) -> float:
if val is None:
return fallback
try:
return float(val)
except Exception:
return fallback
def _first_nonempty(*vals):
for v in vals:
if v is not None:
if isinstance(v, str) and v.strip() == "":
continue
return v
return None
def make_chunk_payloads(parsed_note: Any, chunks: List[Any], **kwargs) -> List[Dict[str, Any]]:
parsed = _coerce_mapping(parsed_note)
fm = _get_frontmatter(parsed)
# external sources
types_registry = kwargs.get("types") or kwargs.get("types_registry")
types_from_yaml = _load_types_from_yaml(kwargs.get("types_file"))
types_all: Dict[str, Any] = types_registry if isinstance(types_registry, dict) else types_from_yaml
note_type: Optional[str] = _first_nonempty(parsed.get("type"), fm.get("type"))
type_defaults = _resolve_type_defaults(note_type, types_all)
env_default = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
env_default_val = _to_float(env_default, 1.0) if env_default is not None else 1.0
effective_retriever_weight = _to_float(
_first_nonempty(
fm.get("retriever_weight"),
type_defaults.get("retriever_weight"),
env_default_val,
1.0,
),
1.0,
)
effective_chunk_profile = _first_nonempty(
fm.get("chunk_profile"),
fm.get("profile"),
type_defaults.get("chunk_profile"),
os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE"),
)
out: List[Dict[str, Any]] = []
for ch in chunks or []:
payload = _coerce_chunk_dict(ch) # preserve all existing chunk fields
payload["retriever_weight"] = effective_retriever_weight
if effective_chunk_profile is not None:
payload["chunk_profile"] = effective_chunk_profile
out.append(payload)
return out