feat: Enhance placeholder caption generation and formatting
All checks were successful
Deploy Development / deploy (push) Successful in 54s
Build Test / pytest-backend (push) Successful in 8s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 16s

- Updated `build_ai_placeholder_caption` in `placeholder_registry.py` to improve the generation of AI context captions by prioritizing descriptions and avoiding redundancy.
- Introduced `format_value_with_d_modifier` in `placeholder_resolver.py` to format values with contextual information, enhancing the clarity of exported placeholder values.
- Modified `export_placeholder_values` in `prompts.py` to utilize the new formatting function, ensuring that exported data includes both raw values and contextual descriptions.
- Added tests for the new formatting function and updated existing tests to ensure accurate caption generation.

These changes improve the contextual relevance of placeholder data and enhance the user experience when interacting with exported values.
This commit is contained in:
Lars 2026-04-11 21:47:08 +02:00
parent baeddd7c13
commit a9a414b956
4 changed files with 91 additions and 19 deletions

View File

@ -260,21 +260,34 @@ class PlaceholderRegistry:
def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 400) -> str: def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 400) -> str:
""" """
Kurztext für KI-Kontext (z. B. Modifier |d): Bedeutung/Skala, ohne die Rohausgabe zu ersetzen. Text für |d und Exportfeld ai_caption: zuerst **was** der Platzhalter misst (description),
Nutzt business_meaning / semantic_contract; bei Scores explizite 0100-Erläuterung. dann **Einordnung** (business_meaning oder gekürzter semantic_contract).
So ist klar, worauf sich der konkrete Wert bezieht nicht nur eine Meta-Bedeutung.
""" """
chunks: List[str] = [] desc = (metadata.description or "").strip()
bm = (metadata.business_meaning or "").strip() bm = (metadata.business_meaning or "").strip()
sc = (metadata.semantic_contract or "").strip() sc = (metadata.semantic_contract or "").strip()
desc = (metadata.description or "").strip()
if bm: chunks: List[str] = []
chunks.append(bm) if desc:
elif sc:
chunks.append(sc if len(sc) <= max_len else sc[: max_len - 1] + "")
elif desc:
chunks.append(desc) chunks.append(desc)
interpret = bm
if not interpret and sc:
interpret = sc if len(sc) <= max_len else sc[: max_len - 1] + ""
if interpret:
blob = " ".join(chunks).lower()
il = interpret.lower()
# Keine Dublette: gleicher Text oder lange Description bereits in der Interpretation
redundant = il in blob or (
desc
and len(desc) >= 10
and desc.lower() in il
)
if not redundant:
chunks.append(interpret)
if metadata.placeholder_type == PlaceholderType.SCORE: if metadata.placeholder_type == PlaceholderType.SCORE:
chunks.append("Skala 0100: höher = im Modell günstiger / besser abgestimmt.") chunks.append("Skala 0100: höher = im Modell günstiger / besser abgestimmt.")
@ -282,9 +295,18 @@ def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 4
if unit and metadata.placeholder_type != PlaceholderType.SCORE: if unit and metadata.placeholder_type != PlaceholderType.SCORE:
blob = " ".join(chunks).lower() blob = " ".join(chunks).lower()
u_low = unit.lower() u_low = unit.lower()
if u_low not in blob and u_low.replace(" ", "") not in blob.replace(" ", ""): # Einheit oft schon in description („… in g (30d)“, „Kalorien“) — nicht doppeln
if u_low not in ("score (0-100)", "0-100", "0100", "dimensionless"): compact_blob = blob.replace(" ", "").replace("/", "")
chunks.append(f"Technischer Bezug: {unit}.") compact_u = u_low.replace(" ", "").replace("/", "")
unit_redundant = compact_u in compact_blob or (
"g/day" in u_low and ("g/" in blob or "gramm" in blob or " protein" in blob or " fett" in blob or " kh" in blob)
) or ("kcal" in u_low and ("kcal" in blob or "kalorien" in blob))
if (
not unit_redundant
and u_low not in ("score (0-100)", "0-100", "0100", "dimensionless")
):
chunks.append(f"Technischer Bezug: {unit}.")
out = " ".join(c for c in chunks if c).strip() out = " ".join(c for c in chunks if c).strip()
if len(out) > max_len + 120: if len(out) > max_len + 120:

View File

@ -10,7 +10,7 @@ This module now focuses on FORMATTING for AI consumption.
import json import json
import re import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Dict, List, Optional, Callable, Tuple from typing import Any, Dict, List, Optional, Callable, Tuple
from db import get_db, get_cursor, r2d from db import get_db, get_cursor, r2d
# Phase 0c: Import data layer # Phase 0c: Import data layer
@ -62,6 +62,17 @@ def _ai_caption_for_placeholder_key(key: str) -> Optional[str]:
return None return None
def format_value_with_d_modifier(value: str, catalog_row: Dict[str, Any]) -> str:
"""
Entspricht der Prompt-Ersetzung bei {{key|d}}: Wert Kontext.
Kontext: ai_caption aus dem Katalog, sonst description (wie prompt_executor).
"""
cap = (catalog_row.get("ai_caption") or catalog_row.get("description") or "").strip()
if cap:
return f"{value}{cap}"
return str(value)
# ── Helper Functions ────────────────────────────────────────────────────────── # ── Helper Functions ──────────────────────────────────────────────────────────
def get_profile_data(profile_id: str) -> Dict: def get_profile_data(profile_id: str) -> Dict:

View File

@ -21,6 +21,7 @@ from placeholder_resolver import (
resolve_placeholders, resolve_placeholders,
get_unknown_placeholders, get_unknown_placeholders,
get_placeholder_example_values, get_placeholder_example_values,
format_value_with_d_modifier,
get_available_placeholders, get_available_placeholders,
get_placeholder_catalog get_placeholder_catalog
) )
@ -457,8 +458,8 @@ def export_placeholder_values(session: dict = Depends(require_auth)):
""" """
Export all available placeholders with their current resolved values. Export all available placeholders with their current resolved values.
Returns JSON export suitable for download with all placeholders Pro Zeile: value = Rohwert wie bei {{key}}, example = Vorschau wie bei {{key|d}}
resolved for the current user's profile. (Wert ai_caption bzw. description). JSON-Download für das aktive Profil.
""" """
from datetime import datetime from datetime import datetime
profile_id = session['profile_id'] profile_id = session['profile_id']
@ -486,11 +487,12 @@ def export_placeholder_values(session: dict = Depends(require_auth)):
export_data['placeholders_by_category'][category] = [] export_data['placeholders_by_category'][category] = []
for item in items: for item in items:
key = item['key'].replace('{{', '').replace('}}', '') key = item['key'].replace('{{', '').replace('}}', '')
raw_val = cleaned_values.get(key, 'nicht verfügbar')
row = { row = {
'key': item['key'], 'key': item['key'],
'description': item['description'], 'description': item['description'],
'value': cleaned_values.get(key, 'nicht verfügbar'), 'value': raw_val,
'example': item.get('example'), 'example': format_value_with_d_modifier(str(raw_val), item),
} }
if item.get('ai_caption'): if item.get('ai_caption'):
row['ai_caption'] = item['ai_caption'] row['ai_caption'] = item['ai_caption']
@ -662,11 +664,12 @@ def export_placeholder_values_extended(
export_data['legacy']['placeholders_by_category'][category] = [] export_data['legacy']['placeholders_by_category'][category] = []
for item in items: for item in items:
key = item['key'].replace('{{', '').replace('}}', '') key = item['key'].replace('{{', '').replace('}}', '')
raw_val = cleaned_values.get(key, 'nicht verfügbar')
export_data['legacy']['placeholders_by_category'][category].append({ export_data['legacy']['placeholders_by_category'][category].append({
'key': item['key'], 'key': item['key'],
'description': item['description'], 'description': item['description'],
'value': cleaned_values.get(key, 'nicht verfügbar'), 'value': raw_val,
'example': item.get('example') 'example': format_value_with_d_modifier(str(raw_val), item),
}) })
# Fill metadata flat # Fill metadata flat

View File

@ -6,6 +6,7 @@ from placeholder_registry import (
build_ai_placeholder_caption, build_ai_placeholder_caption,
) )
import placeholder_resolver as pr import placeholder_resolver as pr
from placeholder_resolver import format_value_with_d_modifier
def test_build_ai_caption_prefers_business_meaning(): def test_build_ai_caption_prefers_business_meaning():
@ -22,9 +23,28 @@ def test_build_ai_caption_prefers_business_meaning():
output_type=OutputType.NUMERIC, output_type=OutputType.NUMERIC,
) )
cap = build_ai_placeholder_caption(m) cap = build_ai_placeholder_caption(m)
assert cap.startswith("Kurzbeschreibung")
assert "Kernbedeutung" in cap assert "Kernbedeutung" in cap
def test_build_ai_caption_description_then_meaning_like_protein_avg():
m = PlaceholderMetadata(
key="protein_avg",
category="Ernährung",
description="Durchschn. Protein in g (30d)",
resolver_module="m",
resolver_function="f",
business_meaning="Zentraler Placeholder für Muskelerhalt.",
unit="g/day",
placeholder_type=PlaceholderType.INTERPRETED,
output_type=OutputType.NUMERIC,
)
cap = build_ai_placeholder_caption(m)
assert cap.startswith("Durchschn. Protein in g (30d)")
assert "Muskelerhalt" in cap
assert "Technischer Bezug" not in cap
def test_build_ai_caption_score_adds_scale(): def test_build_ai_caption_score_adds_scale():
m = PlaceholderMetadata( m = PlaceholderMetadata(
key="test_score", key="test_score",
@ -54,3 +74,19 @@ def test_placeholder_token_regex_optional_modifier():
def test_get_unknown_placeholders_strips_modifier(): def test_get_unknown_placeholders_strips_modifier():
unk = pr.get_unknown_placeholders("{{not_a_real_key|d}}") unk = pr.get_unknown_placeholders("{{not_a_real_key|d}}")
assert set(unk) == {"not_a_real_key"} assert set(unk) == {"not_a_real_key"}
def test_format_value_with_d_modifier_matches_prompt_executor():
row = {
"key": "protein_avg",
"description": "Durchschn. Protein in g (30d)",
"example": "119g/Tag",
"ai_caption": "Durchschn. Protein in g (30d). Zentral für Muskelerhalt.",
}
out = format_value_with_d_modifier("119g/Tag", row)
assert out == "119g/Tag — Durchschn. Protein in g (30d). Zentral für Muskelerhalt."
def test_format_value_with_d_modifier_falls_back_to_description():
row = {"description": "Nur Beschreibung", "key": "x"}
assert format_value_with_d_modifier("42", row) == "42 — Nur Beschreibung"