mitai-jinkendo/backend/placeholder_registry.py
Lars 4868e44882
All checks were successful
Deploy Development / deploy (push) Successful in 55s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s
feat: Refine placeholder resolution with enhanced modifiers support
- Updated `resolve_placeholders` in `prompt_executor.py` to support combined modifiers for placeholders, allowing for more flexible output formats.
- Enhanced `build_ai_placeholder_caption` in `placeholder_registry.py` to clarify the generation of AI context captions, focusing on descriptions and explanations.
- Introduced new helper functions in `placeholder_resolver.py` to streamline the retrieval of descriptions and explanations for placeholders.
- Modified tests to cover new functionality, ensuring accurate behavior for combined modifiers and improved placeholder resolution.

These changes enhance the usability and clarity of placeholder outputs, providing users with richer contextual information.
2026-04-11 21:58:29 +02:00

318 lines
9.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Placeholder/Metric Registry Framework
Central registry for all placeholders/metrics ensuring consistent metadata across:
- Backend prompt resolution (Layer 2a)
- GUI selection lists
- Extended export
- Validation
- Chart assignment (Layer 2b)
Version: 1.0 (Part A - Nutrition Basis Metrics)
"""
from dataclasses import dataclass, field, asdict
from typing import Callable, Dict, List, Optional, Any
from enum import Enum
class EvidenceType(str, Enum):
"""Evidence type for metadata fields."""
CODE_DERIVED = "code-derived"
DRAFT_DERIVED = "draft-derived"
MIXED = "mixed"
UNRESOLVED = "unresolved"
TO_VERIFY = "to_verify"
class OutputType(str, Enum):
"""Placeholder output types."""
NUMERIC = "numeric"
STRING = "string"
BOOLEAN = "boolean"
JSON = "json"
LIST = "list"
TEXT_SUMMARY = "text_summary"
class PlaceholderType(str, Enum):
"""Placeholder semantic types."""
ATOMIC = "atomic"
RAW_DATA = "raw_data"
INTERPRETED = "interpreted"
SCORE = "score"
META = "meta"
@dataclass
class MissingValuePolicy:
"""Structured missing value handling."""
available: bool
value_raw: Optional[Any]
missing_reason: str # no_data, insufficient_data, resolver_error, calculation_error, not_applicable
legacy_display: str
@dataclass
class PlaceholderMetadata:
"""
Complete metadata for a placeholder/metric.
All fields track their evidence type to maintain transparency
about what is code-derived vs. draft-derived.
"""
# Core identification
key: str
category: str
description: str
# Technical (typically code-derived)
resolver_module: str
resolver_function: str
data_layer_module: Optional[str] = None
data_layer_function: Optional[str] = None
source_tables: List[str] = field(default_factory=list)
# Semantic (typically draft-derived or mixed)
semantic_contract: str = ""
business_meaning: str = ""
unit: str = ""
time_window: str = ""
output_type: OutputType = OutputType.STRING
placeholder_type: PlaceholderType = PlaceholderType.INTERPRETED
format_hint: str = ""
example_output: str = ""
# Quality (mixed sources)
minimum_data_requirements: Optional[str] = None
quality_filter_policy: Optional[str] = None
confidence_logic: Optional[str] = None
missing_value_policy: Optional[MissingValuePolicy] = None
known_limitations: Optional[str] = None
# Architecture (code-derived)
layer_1_decision: Optional[str] = None
layer_2a_decision: Optional[str] = None
layer_2b_reuse_possible: Optional[bool] = None
architecture_alignment: Optional[str] = None
issue_53_alignment: Optional[str] = None
# Evidence tracking
evidence: Dict[str, EvidenceType] = field(default_factory=dict)
# Runtime resolver (not serialized to export)
_resolver_func: Optional[Callable] = field(default=None, repr=False, compare=False)
def to_dict(self, include_resolver: bool = False) -> Dict:
"""Convert to dictionary for export."""
data = asdict(self)
# Remove private fields
if not include_resolver:
data.pop('_resolver_func', None)
# Convert enums to strings
data['output_type'] = self.output_type.value
data['placeholder_type'] = self.placeholder_type.value
# Convert evidence dict
data['evidence'] = {k: v.value for k, v in self.evidence.items()}
# Convert missing_value_policy
if self.missing_value_policy:
data['missing_value_policy'] = asdict(self.missing_value_policy)
return data
def get_evidence(self, field_name: str) -> Optional[EvidenceType]:
"""Get evidence type for a field."""
return self.evidence.get(field_name)
def set_evidence(self, field_name: str, evidence_type: EvidenceType):
"""Set evidence type for a field."""
self.evidence[field_name] = evidence_type
def validate(self) -> List[str]:
"""Validate metadata completeness."""
issues = []
if not self.key:
issues.append("Missing key")
if not self.category:
issues.append("Missing category")
if not self.description:
issues.append("Missing description")
if not self.resolver_module:
issues.append("Missing resolver_module")
if not self.resolver_function:
issues.append("Missing resolver_function")
if not self.semantic_contract:
issues.append("Missing semantic_contract")
if not self.unit:
issues.append("Missing unit")
if not self.time_window:
issues.append("Missing time_window")
return issues
class PlaceholderRegistry:
"""
Central registry for all placeholders/metrics.
Ensures single source of truth for metadata across all consumers.
"""
def __init__(self):
self._registry: Dict[str, PlaceholderMetadata] = {}
def register(
self,
metadata: PlaceholderMetadata,
resolver_func: Optional[Callable] = None
):
"""
Register a placeholder with complete metadata.
Args:
metadata: Complete placeholder metadata
resolver_func: Optional resolver function (for runtime resolution)
"""
if metadata.key in self._registry:
raise ValueError(f"Placeholder {metadata.key} already registered")
if resolver_func:
metadata._resolver_func = resolver_func
self._registry[metadata.key] = metadata
def get(self, key: str) -> Optional[PlaceholderMetadata]:
"""Get metadata for a placeholder."""
return self._registry.get(key)
def get_all(self) -> Dict[str, PlaceholderMetadata]:
"""Get all registered placeholders."""
return self._registry.copy()
def get_by_category(self, category: str) -> List[PlaceholderMetadata]:
"""Get placeholders by category (for GUI selection lists)."""
return [
m for m in self._registry.values()
if m.category == category
]
def get_all_for_export(self) -> List[Dict]:
"""Get all metadata for extended export."""
return [m.to_dict() for m in self._registry.values()]
def get_by_evidence_type(self, evidence_type: EvidenceType) -> Dict[str, List[str]]:
"""
Get fields by evidence type (for quality assurance).
Returns:
Dict mapping placeholder_key to list of field_names with that evidence type
"""
result = {}
for key, metadata in self._registry.items():
fields = [
field_name
for field_name, ev_type in metadata.evidence.items()
if ev_type == evidence_type
]
if fields:
result[key] = fields
return result
def validate_all(self) -> Dict[str, List[str]]:
"""
Validate all registered placeholders.
Returns:
Dict mapping placeholder_key to list of validation issues
"""
issues = {}
for key, metadata in self._registry.items():
validation_issues = metadata.validate()
if validation_issues:
issues[key] = validation_issues
return issues
def resolve(self, key: str, profile_id: str) -> str:
"""
Resolve a placeholder value for a profile.
Args:
key: Placeholder key
profile_id: User profile ID
Returns:
Resolved value as string
"""
metadata = self.get(key)
if not metadata:
raise ValueError(f"Placeholder {key} not registered")
if not metadata._resolver_func:
raise ValueError(f"Placeholder {key} has no resolver function")
return metadata._resolver_func(profile_id)
def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 400) -> str:
"""
Kurzerklärung / Einordnung für {{key|x}} und Exportfeld ``ai_caption`` (ohne Wert, ohne Einheit).
Inhalt: business_meaning oder gekürzter semantic_contract; bei SCORE-Zeilen die 0100-Skala.
Nicht enthalten: description (die nur bei {{key|d}} angehängt wird) und keine „Technischer Bezug: …“-Zeile.
"""
desc = (metadata.description or "").strip()
bm = (metadata.business_meaning or "").strip()
sc = (metadata.semantic_contract or "").strip()
chunks: List[str] = []
interpret = bm
if not interpret and sc:
interpret = sc if len(sc) <= max_len else sc[: max_len - 1] + ""
if interpret:
il = interpret.lower()
redundant = bool(
desc
and len(desc) >= 10
and desc.lower() in il
)
if not redundant:
chunks.append(interpret)
if metadata.placeholder_type == PlaceholderType.SCORE:
chunks.append("Skala 0100: höher = im Modell günstiger / besser abgestimmt.")
out = " ".join(c for c in chunks if c).strip()
if len(out) > max_len + 120:
out = out[: max_len + 60] + ""
return out
# Global registry instance
_global_registry = PlaceholderRegistry()
def get_registry() -> PlaceholderRegistry:
"""Get the global placeholder registry."""
return _global_registry
def register_placeholder(
metadata: PlaceholderMetadata,
resolver_func: Optional[Callable] = None
):
"""
Register a placeholder in the global registry.
Args:
metadata: Complete placeholder metadata
resolver_func: Optional resolver function
"""
_global_registry.register(metadata, resolver_func)