- Updated `build_ai_placeholder_caption` in `placeholder_registry.py` to improve the generation of AI context captions by prioritizing descriptions and avoiding redundancy. - Introduced `format_value_with_d_modifier` in `placeholder_resolver.py` to format values with contextual information, enhancing the clarity of exported placeholder values. - Modified `export_placeholder_values` in `prompts.py` to utilize the new formatting function, ensuring that exported data includes both raw values and contextual descriptions. - Added tests for the new formatting function and updated existing tests to ensure accurate caption generation. These changes improve the contextual relevance of placeholder data and enhance the user experience when interacting with exported values.
338 lines
10 KiB
Python
338 lines
10 KiB
Python
"""
|
||
Placeholder/Metric Registry Framework
|
||
|
||
Central registry for all placeholders/metrics ensuring consistent metadata across:
|
||
- Backend prompt resolution (Layer 2a)
|
||
- GUI selection lists
|
||
- Extended export
|
||
- Validation
|
||
- Chart assignment (Layer 2b)
|
||
|
||
Version: 1.0 (Part A - Nutrition Basis Metrics)
|
||
"""
|
||
|
||
from dataclasses import dataclass, field, asdict
|
||
from typing import Callable, Dict, List, Optional, Any
|
||
from enum import Enum
|
||
|
||
|
||
class EvidenceType(str, Enum):
|
||
"""Evidence type for metadata fields."""
|
||
CODE_DERIVED = "code-derived"
|
||
DRAFT_DERIVED = "draft-derived"
|
||
MIXED = "mixed"
|
||
UNRESOLVED = "unresolved"
|
||
TO_VERIFY = "to_verify"
|
||
|
||
|
||
class OutputType(str, Enum):
|
||
"""Placeholder output types."""
|
||
NUMERIC = "numeric"
|
||
STRING = "string"
|
||
BOOLEAN = "boolean"
|
||
JSON = "json"
|
||
LIST = "list"
|
||
TEXT_SUMMARY = "text_summary"
|
||
|
||
|
||
class PlaceholderType(str, Enum):
|
||
"""Placeholder semantic types."""
|
||
ATOMIC = "atomic"
|
||
RAW_DATA = "raw_data"
|
||
INTERPRETED = "interpreted"
|
||
SCORE = "score"
|
||
META = "meta"
|
||
|
||
|
||
@dataclass
|
||
class MissingValuePolicy:
|
||
"""Structured missing value handling."""
|
||
available: bool
|
||
value_raw: Optional[Any]
|
||
missing_reason: str # no_data, insufficient_data, resolver_error, calculation_error, not_applicable
|
||
legacy_display: str
|
||
|
||
|
||
@dataclass
|
||
class PlaceholderMetadata:
|
||
"""
|
||
Complete metadata for a placeholder/metric.
|
||
|
||
All fields track their evidence type to maintain transparency
|
||
about what is code-derived vs. draft-derived.
|
||
"""
|
||
# Core identification
|
||
key: str
|
||
category: str
|
||
description: str
|
||
|
||
# Technical (typically code-derived)
|
||
resolver_module: str
|
||
resolver_function: str
|
||
data_layer_module: Optional[str] = None
|
||
data_layer_function: Optional[str] = None
|
||
source_tables: List[str] = field(default_factory=list)
|
||
|
||
# Semantic (typically draft-derived or mixed)
|
||
semantic_contract: str = ""
|
||
business_meaning: str = ""
|
||
unit: str = ""
|
||
time_window: str = ""
|
||
output_type: OutputType = OutputType.STRING
|
||
placeholder_type: PlaceholderType = PlaceholderType.INTERPRETED
|
||
format_hint: str = ""
|
||
example_output: str = ""
|
||
|
||
# Quality (mixed sources)
|
||
minimum_data_requirements: Optional[str] = None
|
||
quality_filter_policy: Optional[str] = None
|
||
confidence_logic: Optional[str] = None
|
||
missing_value_policy: Optional[MissingValuePolicy] = None
|
||
known_limitations: Optional[str] = None
|
||
|
||
# Architecture (code-derived)
|
||
layer_1_decision: Optional[str] = None
|
||
layer_2a_decision: Optional[str] = None
|
||
layer_2b_reuse_possible: Optional[bool] = None
|
||
architecture_alignment: Optional[str] = None
|
||
issue_53_alignment: Optional[str] = None
|
||
|
||
# Evidence tracking
|
||
evidence: Dict[str, EvidenceType] = field(default_factory=dict)
|
||
|
||
# Runtime resolver (not serialized to export)
|
||
_resolver_func: Optional[Callable] = field(default=None, repr=False, compare=False)
|
||
|
||
def to_dict(self, include_resolver: bool = False) -> Dict:
|
||
"""Convert to dictionary for export."""
|
||
data = asdict(self)
|
||
|
||
# Remove private fields
|
||
if not include_resolver:
|
||
data.pop('_resolver_func', None)
|
||
|
||
# Convert enums to strings
|
||
data['output_type'] = self.output_type.value
|
||
data['placeholder_type'] = self.placeholder_type.value
|
||
|
||
# Convert evidence dict
|
||
data['evidence'] = {k: v.value for k, v in self.evidence.items()}
|
||
|
||
# Convert missing_value_policy
|
||
if self.missing_value_policy:
|
||
data['missing_value_policy'] = asdict(self.missing_value_policy)
|
||
|
||
return data
|
||
|
||
def get_evidence(self, field_name: str) -> Optional[EvidenceType]:
|
||
"""Get evidence type for a field."""
|
||
return self.evidence.get(field_name)
|
||
|
||
def set_evidence(self, field_name: str, evidence_type: EvidenceType):
|
||
"""Set evidence type for a field."""
|
||
self.evidence[field_name] = evidence_type
|
||
|
||
def validate(self) -> List[str]:
|
||
"""Validate metadata completeness."""
|
||
issues = []
|
||
|
||
if not self.key:
|
||
issues.append("Missing key")
|
||
if not self.category:
|
||
issues.append("Missing category")
|
||
if not self.description:
|
||
issues.append("Missing description")
|
||
if not self.resolver_module:
|
||
issues.append("Missing resolver_module")
|
||
if not self.resolver_function:
|
||
issues.append("Missing resolver_function")
|
||
if not self.semantic_contract:
|
||
issues.append("Missing semantic_contract")
|
||
if not self.unit:
|
||
issues.append("Missing unit")
|
||
if not self.time_window:
|
||
issues.append("Missing time_window")
|
||
|
||
return issues
|
||
|
||
|
||
class PlaceholderRegistry:
|
||
"""
|
||
Central registry for all placeholders/metrics.
|
||
|
||
Ensures single source of truth for metadata across all consumers.
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._registry: Dict[str, PlaceholderMetadata] = {}
|
||
|
||
def register(
|
||
self,
|
||
metadata: PlaceholderMetadata,
|
||
resolver_func: Optional[Callable] = None
|
||
):
|
||
"""
|
||
Register a placeholder with complete metadata.
|
||
|
||
Args:
|
||
metadata: Complete placeholder metadata
|
||
resolver_func: Optional resolver function (for runtime resolution)
|
||
"""
|
||
if metadata.key in self._registry:
|
||
raise ValueError(f"Placeholder {metadata.key} already registered")
|
||
|
||
if resolver_func:
|
||
metadata._resolver_func = resolver_func
|
||
|
||
self._registry[metadata.key] = metadata
|
||
|
||
def get(self, key: str) -> Optional[PlaceholderMetadata]:
|
||
"""Get metadata for a placeholder."""
|
||
return self._registry.get(key)
|
||
|
||
def get_all(self) -> Dict[str, PlaceholderMetadata]:
|
||
"""Get all registered placeholders."""
|
||
return self._registry.copy()
|
||
|
||
def get_by_category(self, category: str) -> List[PlaceholderMetadata]:
|
||
"""Get placeholders by category (for GUI selection lists)."""
|
||
return [
|
||
m for m in self._registry.values()
|
||
if m.category == category
|
||
]
|
||
|
||
def get_all_for_export(self) -> List[Dict]:
|
||
"""Get all metadata for extended export."""
|
||
return [m.to_dict() for m in self._registry.values()]
|
||
|
||
def get_by_evidence_type(self, evidence_type: EvidenceType) -> Dict[str, List[str]]:
|
||
"""
|
||
Get fields by evidence type (for quality assurance).
|
||
|
||
Returns:
|
||
Dict mapping placeholder_key to list of field_names with that evidence type
|
||
"""
|
||
result = {}
|
||
for key, metadata in self._registry.items():
|
||
fields = [
|
||
field_name
|
||
for field_name, ev_type in metadata.evidence.items()
|
||
if ev_type == evidence_type
|
||
]
|
||
if fields:
|
||
result[key] = fields
|
||
return result
|
||
|
||
def validate_all(self) -> Dict[str, List[str]]:
|
||
"""
|
||
Validate all registered placeholders.
|
||
|
||
Returns:
|
||
Dict mapping placeholder_key to list of validation issues
|
||
"""
|
||
issues = {}
|
||
for key, metadata in self._registry.items():
|
||
validation_issues = metadata.validate()
|
||
if validation_issues:
|
||
issues[key] = validation_issues
|
||
return issues
|
||
|
||
def resolve(self, key: str, profile_id: str) -> str:
|
||
"""
|
||
Resolve a placeholder value for a profile.
|
||
|
||
Args:
|
||
key: Placeholder key
|
||
profile_id: User profile ID
|
||
|
||
Returns:
|
||
Resolved value as string
|
||
"""
|
||
metadata = self.get(key)
|
||
if not metadata:
|
||
raise ValueError(f"Placeholder {key} not registered")
|
||
|
||
if not metadata._resolver_func:
|
||
raise ValueError(f"Placeholder {key} has no resolver function")
|
||
|
||
return metadata._resolver_func(profile_id)
|
||
|
||
|
||
def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 400) -> str:
|
||
"""
|
||
Text für |d und Exportfeld ai_caption: zuerst **was** der Platzhalter misst (description),
|
||
dann **Einordnung** (business_meaning oder gekürzter semantic_contract).
|
||
So ist klar, worauf sich der konkrete Wert bezieht — nicht nur eine „Meta-Bedeutung“.
|
||
"""
|
||
desc = (metadata.description or "").strip()
|
||
bm = (metadata.business_meaning or "").strip()
|
||
sc = (metadata.semantic_contract or "").strip()
|
||
|
||
chunks: List[str] = []
|
||
if desc:
|
||
chunks.append(desc)
|
||
|
||
interpret = bm
|
||
if not interpret and sc:
|
||
interpret = sc if len(sc) <= max_len else sc[: max_len - 1] + "…"
|
||
|
||
if interpret:
|
||
blob = " ".join(chunks).lower()
|
||
il = interpret.lower()
|
||
# Keine Dublette: gleicher Text oder lange Description bereits in der Interpretation
|
||
redundant = il in blob or (
|
||
desc
|
||
and len(desc) >= 10
|
||
and desc.lower() in il
|
||
)
|
||
if not redundant:
|
||
chunks.append(interpret)
|
||
|
||
if metadata.placeholder_type == PlaceholderType.SCORE:
|
||
chunks.append("Skala 0–100: höher = im Modell günstiger / besser abgestimmt.")
|
||
|
||
unit = (metadata.unit or "").strip()
|
||
if unit and metadata.placeholder_type != PlaceholderType.SCORE:
|
||
blob = " ".join(chunks).lower()
|
||
u_low = unit.lower()
|
||
# Einheit oft schon in description („… in g (30d)“, „Kalorien“) — nicht doppeln
|
||
compact_blob = blob.replace(" ", "").replace("/", "")
|
||
compact_u = u_low.replace(" ", "").replace("/", "")
|
||
unit_redundant = compact_u in compact_blob or (
|
||
"g/day" in u_low and ("g/" in blob or "gramm" in blob or " protein" in blob or " fett" in blob or " kh" in blob)
|
||
) or ("kcal" in u_low and ("kcal" in blob or "kalorien" in blob))
|
||
|
||
if (
|
||
not unit_redundant
|
||
and u_low not in ("score (0-100)", "0-100", "0–100", "dimensionless")
|
||
):
|
||
chunks.append(f"Technischer Bezug: {unit}.")
|
||
|
||
out = " ".join(c for c in chunks if c).strip()
|
||
if len(out) > max_len + 120:
|
||
out = out[: max_len + 60] + "…"
|
||
return out or desc or metadata.key
|
||
|
||
|
||
# Global registry instance
|
||
_global_registry = PlaceholderRegistry()
|
||
|
||
|
||
def get_registry() -> PlaceholderRegistry:
|
||
"""Get the global placeholder registry."""
|
||
return _global_registry
|
||
|
||
|
||
def register_placeholder(
|
||
metadata: PlaceholderMetadata,
|
||
resolver_func: Optional[Callable] = None
|
||
):
|
||
"""
|
||
Register a placeholder in the global registry.
|
||
|
||
Args:
|
||
metadata: Complete placeholder metadata
|
||
resolver_func: Optional resolver function
|
||
"""
|
||
_global_registry.register(metadata, resolver_func)
|