mitai-jinkendo/backend/placeholder_registry.py
Lars baeddd7c13
All checks were successful
Deploy Development / deploy (push) Successful in 48s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 16s
feat: Enhance placeholder system with AI context support
- Introduced `build_ai_placeholder_caption` function in `placeholder_registry.py` to generate AI context captions based on placeholder metadata.
- Updated `resolve_placeholders` in `placeholder_resolver.py` to support modifiers for AI context, allowing for enhanced descriptions when placeholders are resolved.
- Modified `get_placeholder_catalog` to include AI captions in the output, improving the metadata available for placeholders.
- Adjusted `export_placeholder_values` to include AI captions in the exported data, enhancing the information provided to users.

These changes improve the flexibility and functionality of the placeholder system, enabling richer context generation for dynamic content.
2026-04-11 21:36:29 +02:00

316 lines
9.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Placeholder/Metric Registry Framework
Central registry for all placeholders/metrics ensuring consistent metadata across:
- Backend prompt resolution (Layer 2a)
- GUI selection lists
- Extended export
- Validation
- Chart assignment (Layer 2b)
Version: 1.0 (Part A - Nutrition Basis Metrics)
"""
from dataclasses import dataclass, field, asdict
from typing import Callable, Dict, List, Optional, Any
from enum import Enum
class EvidenceType(str, Enum):
"""Evidence type for metadata fields."""
CODE_DERIVED = "code-derived"
DRAFT_DERIVED = "draft-derived"
MIXED = "mixed"
UNRESOLVED = "unresolved"
TO_VERIFY = "to_verify"
class OutputType(str, Enum):
"""Placeholder output types."""
NUMERIC = "numeric"
STRING = "string"
BOOLEAN = "boolean"
JSON = "json"
LIST = "list"
TEXT_SUMMARY = "text_summary"
class PlaceholderType(str, Enum):
"""Placeholder semantic types."""
ATOMIC = "atomic"
RAW_DATA = "raw_data"
INTERPRETED = "interpreted"
SCORE = "score"
META = "meta"
@dataclass
class MissingValuePolicy:
"""Structured missing value handling."""
available: bool
value_raw: Optional[Any]
missing_reason: str # no_data, insufficient_data, resolver_error, calculation_error, not_applicable
legacy_display: str
@dataclass
class PlaceholderMetadata:
"""
Complete metadata for a placeholder/metric.
All fields track their evidence type to maintain transparency
about what is code-derived vs. draft-derived.
"""
# Core identification
key: str
category: str
description: str
# Technical (typically code-derived)
resolver_module: str
resolver_function: str
data_layer_module: Optional[str] = None
data_layer_function: Optional[str] = None
source_tables: List[str] = field(default_factory=list)
# Semantic (typically draft-derived or mixed)
semantic_contract: str = ""
business_meaning: str = ""
unit: str = ""
time_window: str = ""
output_type: OutputType = OutputType.STRING
placeholder_type: PlaceholderType = PlaceholderType.INTERPRETED
format_hint: str = ""
example_output: str = ""
# Quality (mixed sources)
minimum_data_requirements: Optional[str] = None
quality_filter_policy: Optional[str] = None
confidence_logic: Optional[str] = None
missing_value_policy: Optional[MissingValuePolicy] = None
known_limitations: Optional[str] = None
# Architecture (code-derived)
layer_1_decision: Optional[str] = None
layer_2a_decision: Optional[str] = None
layer_2b_reuse_possible: Optional[bool] = None
architecture_alignment: Optional[str] = None
issue_53_alignment: Optional[str] = None
# Evidence tracking
evidence: Dict[str, EvidenceType] = field(default_factory=dict)
# Runtime resolver (not serialized to export)
_resolver_func: Optional[Callable] = field(default=None, repr=False, compare=False)
def to_dict(self, include_resolver: bool = False) -> Dict:
"""Convert to dictionary for export."""
data = asdict(self)
# Remove private fields
if not include_resolver:
data.pop('_resolver_func', None)
# Convert enums to strings
data['output_type'] = self.output_type.value
data['placeholder_type'] = self.placeholder_type.value
# Convert evidence dict
data['evidence'] = {k: v.value for k, v in self.evidence.items()}
# Convert missing_value_policy
if self.missing_value_policy:
data['missing_value_policy'] = asdict(self.missing_value_policy)
return data
def get_evidence(self, field_name: str) -> Optional[EvidenceType]:
"""Get evidence type for a field."""
return self.evidence.get(field_name)
def set_evidence(self, field_name: str, evidence_type: EvidenceType):
"""Set evidence type for a field."""
self.evidence[field_name] = evidence_type
def validate(self) -> List[str]:
"""Validate metadata completeness."""
issues = []
if not self.key:
issues.append("Missing key")
if not self.category:
issues.append("Missing category")
if not self.description:
issues.append("Missing description")
if not self.resolver_module:
issues.append("Missing resolver_module")
if not self.resolver_function:
issues.append("Missing resolver_function")
if not self.semantic_contract:
issues.append("Missing semantic_contract")
if not self.unit:
issues.append("Missing unit")
if not self.time_window:
issues.append("Missing time_window")
return issues
class PlaceholderRegistry:
"""
Central registry for all placeholders/metrics.
Ensures single source of truth for metadata across all consumers.
"""
def __init__(self):
self._registry: Dict[str, PlaceholderMetadata] = {}
def register(
self,
metadata: PlaceholderMetadata,
resolver_func: Optional[Callable] = None
):
"""
Register a placeholder with complete metadata.
Args:
metadata: Complete placeholder metadata
resolver_func: Optional resolver function (for runtime resolution)
"""
if metadata.key in self._registry:
raise ValueError(f"Placeholder {metadata.key} already registered")
if resolver_func:
metadata._resolver_func = resolver_func
self._registry[metadata.key] = metadata
def get(self, key: str) -> Optional[PlaceholderMetadata]:
"""Get metadata for a placeholder."""
return self._registry.get(key)
def get_all(self) -> Dict[str, PlaceholderMetadata]:
"""Get all registered placeholders."""
return self._registry.copy()
def get_by_category(self, category: str) -> List[PlaceholderMetadata]:
"""Get placeholders by category (for GUI selection lists)."""
return [
m for m in self._registry.values()
if m.category == category
]
def get_all_for_export(self) -> List[Dict]:
"""Get all metadata for extended export."""
return [m.to_dict() for m in self._registry.values()]
def get_by_evidence_type(self, evidence_type: EvidenceType) -> Dict[str, List[str]]:
"""
Get fields by evidence type (for quality assurance).
Returns:
Dict mapping placeholder_key to list of field_names with that evidence type
"""
result = {}
for key, metadata in self._registry.items():
fields = [
field_name
for field_name, ev_type in metadata.evidence.items()
if ev_type == evidence_type
]
if fields:
result[key] = fields
return result
def validate_all(self) -> Dict[str, List[str]]:
"""
Validate all registered placeholders.
Returns:
Dict mapping placeholder_key to list of validation issues
"""
issues = {}
for key, metadata in self._registry.items():
validation_issues = metadata.validate()
if validation_issues:
issues[key] = validation_issues
return issues
def resolve(self, key: str, profile_id: str) -> str:
"""
Resolve a placeholder value for a profile.
Args:
key: Placeholder key
profile_id: User profile ID
Returns:
Resolved value as string
"""
metadata = self.get(key)
if not metadata:
raise ValueError(f"Placeholder {key} not registered")
if not metadata._resolver_func:
raise ValueError(f"Placeholder {key} has no resolver function")
return metadata._resolver_func(profile_id)
def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 400) -> str:
"""
Kurztext für KI-Kontext (z. B. Modifier |d): Bedeutung/Skala, ohne die Rohausgabe zu ersetzen.
Nutzt business_meaning / semantic_contract; bei Scores explizite 0100-Erläuterung.
"""
chunks: List[str] = []
bm = (metadata.business_meaning or "").strip()
sc = (metadata.semantic_contract or "").strip()
desc = (metadata.description or "").strip()
if bm:
chunks.append(bm)
elif sc:
chunks.append(sc if len(sc) <= max_len else sc[: max_len - 1] + "")
elif desc:
chunks.append(desc)
if metadata.placeholder_type == PlaceholderType.SCORE:
chunks.append("Skala 0100: höher = im Modell günstiger / besser abgestimmt.")
unit = (metadata.unit or "").strip()
if unit and metadata.placeholder_type != PlaceholderType.SCORE:
blob = " ".join(chunks).lower()
u_low = unit.lower()
if u_low not in blob and u_low.replace(" ", "") not in blob.replace(" ", ""):
if u_low not in ("score (0-100)", "0-100", "0100", "dimensionless"):
chunks.append(f"Technischer Bezug: {unit}.")
out = " ".join(c for c in chunks if c).strip()
if len(out) > max_len + 120:
out = out[: max_len + 60] + ""
return out or desc or metadata.key
# Global registry instance
_global_registry = PlaceholderRegistry()
def get_registry() -> PlaceholderRegistry:
"""Get the global placeholder registry."""
return _global_registry
def register_placeholder(
metadata: PlaceholderMetadata,
resolver_func: Optional[Callable] = None
):
"""
Register a placeholder in the global registry.
Args:
metadata: Complete placeholder metadata
resolver_func: Optional resolver function
"""
_global_registry.register(metadata, resolver_func)