From a9a414b956b557f4f7a29b521cbbed9a934d7d1a Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 11 Apr 2026 21:47:08 +0200
Subject: [PATCH] feat: Enhance placeholder caption generation and formatting

- Updated `build_ai_placeholder_caption` in `placeholder_registry.py` to improve the generation of AI context captions by prioritizing descriptions and avoiding redundancy.
- Introduced `format_value_with_d_modifier` in `placeholder_resolver.py` to format values with contextual information, enhancing the clarity of exported placeholder values.
- Modified `export_placeholder_values` in `prompts.py` to utilize the new formatting function, ensuring that exported data includes both raw values and contextual descriptions.
- Added tests for the new formatting function and updated existing tests to ensure accurate caption generation.

These changes improve the contextual relevance of placeholder data and enhance the user experience when interacting with exported values.
---
 backend/placeholder_registry.py              | 46 +++++++++++++++-----
 backend/placeholder_resolver.py              | 13 +++++-
 backend/routers/prompts.py                   | 15 ++++---
 backend/tests/test_placeholder_modifier_d.py | 36 +++++++++++++++
 4 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/backend/placeholder_registry.py b/backend/placeholder_registry.py
index 0571abc..b02fe96 100644
--- a/backend/placeholder_registry.py
+++ b/backend/placeholder_registry.py
@@ -260,21 +260,34 @@ class PlaceholderRegistry:
 
 def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 400) -> str:
     """
-    Kurztext für KI-Kontext (z. B. Modifier |d): Bedeutung/Skala, ohne die Rohausgabe zu ersetzen.
-    Nutzt business_meaning / semantic_contract; bei Scores explizite 0–100-Erläuterung.
+    Text für |d und Exportfeld ai_caption: zuerst **was** der Platzhalter misst (description),
+    dann **Einordnung** (business_meaning oder gekürzter semantic_contract).
+    So ist klar, worauf sich der konkrete Wert bezieht — nicht nur eine „Meta-Bedeutung“.
     """
-    chunks: List[str] = []
+    desc = (metadata.description or "").strip()
     bm = (metadata.business_meaning or "").strip()
     sc = (metadata.semantic_contract or "").strip()
-    desc = (metadata.description or "").strip()
 
-    if bm:
-        chunks.append(bm)
-    elif sc:
-        chunks.append(sc if len(sc) <= max_len else sc[: max_len - 1] + "…")
-    elif desc:
+    chunks: List[str] = []
+    if desc:
         chunks.append(desc)
 
+    interpret = bm
+    if not interpret and sc:
+        interpret = sc if len(sc) <= max_len else sc[: max_len - 1] + "…"
+
+    if interpret:
+        blob = " ".join(chunks).lower()
+        il = interpret.lower()
+        # Keine Dublette: gleicher Text oder lange Description bereits in der Interpretation
+        redundant = il in blob or (
+            desc
+            and len(desc) >= 10
+            and desc.lower() in il
+        )
+        if not redundant:
+            chunks.append(interpret)
+
     if metadata.placeholder_type == PlaceholderType.SCORE:
         chunks.append("Skala 0–100: höher = im Modell günstiger / besser abgestimmt.")
 
@@ -282,9 +295,18 @@ def build_ai_placeholder_caption(metadata: PlaceholderMetadata, max_len: int = 4
     if unit and metadata.placeholder_type != PlaceholderType.SCORE:
         blob = " ".join(chunks).lower()
         u_low = unit.lower()
-        if u_low not in blob and u_low.replace(" ", "") not in blob.replace(" ", ""):
-            if u_low not in ("score (0-100)", "0-100", "0–100", "dimensionless"):
-                chunks.append(f"Technischer Bezug: {unit}.")
+        # Einheit oft schon in description („… in g (30d)“, „Kalorien“) — nicht doppeln
+        compact_blob = blob.replace(" ", "").replace("/", "")
+        compact_u = u_low.replace(" ", "").replace("/", "")
+        unit_redundant = compact_u in compact_blob or (
+            "g/day" in u_low and ("g/" in blob or "gramm" in blob or " protein" in blob or " fett" in blob or " kh" in blob)
+        ) or ("kcal" in u_low and ("kcal" in blob or "kalorien" in blob))
+
+        if (
+            not unit_redundant
+            and u_low not in ("score (0-100)", "0-100", "0–100", "dimensionless")
+        ):
+            chunks.append(f"Technischer Bezug: {unit}.")
 
     out = " ".join(c for c in chunks if c).strip()
     if len(out) > max_len + 120:
diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py
index db4f0b5..40aae40 100644
--- a/backend/placeholder_resolver.py
+++ b/backend/placeholder_resolver.py
@@ -10,7 +10,7 @@ This module now focuses on FORMATTING for AI consumption.
 import json
 import re
 from datetime import datetime, timedelta
-from typing import Dict, List, Optional, Callable, Tuple
+from typing import Any, Dict, List, Optional, Callable, Tuple
 from db import get_db, get_cursor, r2d
 
 # Phase 0c: Import data layer
@@ -62,6 +62,17 @@ def _ai_caption_for_placeholder_key(key: str) -> Optional[str]:
     return None
 
 
+def format_value_with_d_modifier(value: str, catalog_row: Dict[str, Any]) -> str:
+    """
+    Entspricht der Prompt-Ersetzung bei {{key|d}}: „Wert — Kontext“.
+    Kontext: ai_caption aus dem Katalog, sonst description (wie prompt_executor).
+    """
+    cap = (catalog_row.get("ai_caption") or catalog_row.get("description") or "").strip()
+    if cap:
+        return f"{value} — {cap}"
+    return str(value)
+
+
 # ── Helper Functions ──────────────────────────────────────────────────────────
 
 def get_profile_data(profile_id: str) -> Dict:
diff --git a/backend/routers/prompts.py b/backend/routers/prompts.py
index 41f5172..43fc9ba 100644
--- a/backend/routers/prompts.py
+++ b/backend/routers/prompts.py
@@ -21,6 +21,7 @@ from placeholder_resolver import (
     resolve_placeholders,
     get_unknown_placeholders,
     get_placeholder_example_values,
+    format_value_with_d_modifier,
     get_available_placeholders,
     get_placeholder_catalog
 )
@@ -457,8 +458,8 @@ def export_placeholder_values(session: dict = Depends(require_auth)):
     """
     Export all available placeholders with their current resolved values.
 
-    Returns JSON export suitable for download with all placeholders
-    resolved for the current user's profile.
+    Pro Zeile: value = Rohwert wie bei {{key}}, example = Vorschau wie bei {{key|d}}
+    (Wert — ai_caption bzw. description). JSON-Download für das aktive Profil.
     """
     from datetime import datetime
     profile_id = session['profile_id']
@@ -486,11 +487,12 @@ def export_placeholder_values(session: dict = Depends(require_auth)):
         export_data['placeholders_by_category'][category] = []
         for item in items:
             key = item['key'].replace('{{', '').replace('}}', '')
+            raw_val = cleaned_values.get(key, 'nicht verfügbar')
             row = {
                 'key': item['key'],
                 'description': item['description'],
-                'value': cleaned_values.get(key, 'nicht verfügbar'),
-                'example': item.get('example'),
+                'value': raw_val,
+                'example': format_value_with_d_modifier(str(raw_val), item),
             }
             if item.get('ai_caption'):
                 row['ai_caption'] = item['ai_caption']
@@ -662,11 +664,12 @@ def export_placeholder_values_extended(
         export_data['legacy']['placeholders_by_category'][category] = []
         for item in items:
             key = item['key'].replace('{{', '').replace('}}', '')
+            raw_val = cleaned_values.get(key, 'nicht verfügbar')
             export_data['legacy']['placeholders_by_category'][category].append({
                 'key': item['key'],
                 'description': item['description'],
-                'value': cleaned_values.get(key, 'nicht verfügbar'),
-                'example': item.get('example')
+                'value': raw_val,
+                'example': format_value_with_d_modifier(str(raw_val), item),
             })
 
     # Fill metadata flat
diff --git a/backend/tests/test_placeholder_modifier_d.py b/backend/tests/test_placeholder_modifier_d.py
index b3149bf..bf55111 100644
--- a/backend/tests/test_placeholder_modifier_d.py
+++ b/backend/tests/test_placeholder_modifier_d.py
@@ -6,6 +6,7 @@ from placeholder_registry import (
     build_ai_placeholder_caption,
 )
 import placeholder_resolver as pr
+from placeholder_resolver import format_value_with_d_modifier
 
 
 def test_build_ai_caption_prefers_business_meaning():
@@ -22,9 +23,28 @@ def test_build_ai_caption_prefers_business_meaning():
         output_type=OutputType.NUMERIC,
     )
     cap = build_ai_placeholder_caption(m)
+    assert cap.startswith("Kurzbeschreibung")
     assert "Kernbedeutung" in cap
 
 
+def test_build_ai_caption_description_then_meaning_like_protein_avg():
+    m = PlaceholderMetadata(
+        key="protein_avg",
+        category="Ernährung",
+        description="Durchschn. Protein in g (30d)",
+        resolver_module="m",
+        resolver_function="f",
+        business_meaning="Zentraler Placeholder für Muskelerhalt.",
+        unit="g/day",
+        placeholder_type=PlaceholderType.INTERPRETED,
+        output_type=OutputType.NUMERIC,
+    )
+    cap = build_ai_placeholder_caption(m)
+    assert cap.startswith("Durchschn. Protein in g (30d)")
+    assert "Muskelerhalt" in cap
+    assert "Technischer Bezug" not in cap
+
+
 def test_build_ai_caption_score_adds_scale():
     m = PlaceholderMetadata(
         key="test_score",
@@ -54,3 +74,19 @@ def test_placeholder_token_regex_optional_modifier():
 def test_get_unknown_placeholders_strips_modifier():
     unk = pr.get_unknown_placeholders("{{not_a_real_key|d}}")
     assert set(unk) == {"not_a_real_key"}
+
+
+def test_format_value_with_d_modifier_matches_prompt_executor():
+    row = {
+        "key": "protein_avg",
+        "description": "Durchschn. Protein in g (30d)",
+        "example": "119g/Tag",
+        "ai_caption": "Durchschn. Protein in g (30d). Zentral für Muskelerhalt.",
+    }
+    out = format_value_with_d_modifier("119g/Tag", row)
+    assert out == "119g/Tag — Durchschn. Protein in g (30d). Zentral für Muskelerhalt."
+
+
+def test_format_value_with_d_modifier_falls_back_to_description():
+    row = {"description": "Nur Beschreibung", "key": "x"}
+    assert format_value_with_d_modifier("42", row) == "42 — Nur Beschreibung"