From 7226e04e9c0146f6d6f25c185b9c6f336bd1ff1c Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 18 Apr 2026 10:12:33 +0200
Subject: [PATCH 1/4] feat: implement effective CSV delimiter resolution for
 imports

- Added `resolve_effective_csv_delimiter` function to determine the correct delimiter based on the uploaded file and template.
- Updated CSV import logic to utilize the new delimiter resolution method, ensuring accurate parsing of CSV files with varying delimiters.
- Enhanced documentation to reflect changes in delimiter handling.
- Added unit tests for the new delimiter resolution functionality.
---
 .../UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md       |  1 +
 backend/csv_parser/core.py                    | 40 +++++++++++++++++++
 backend/csv_parser/executor.py                |  5 ++-
 backend/routers/csv_import.py                 |  5 ++-
 backend/tests/test_csv_parser_core.py         | 15 +++++++
 5 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md b/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md
index 5bdcea0..e47174c 100644
--- a/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md
+++ b/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md
@@ -18,6 +18,7 @@ Dieses Dokument ist **normativ für Agenten**, die ein neues Import-Zielmodul an
 | Admin-Systemvorlagen | `backend/routers/admin_csv_templates.py` |
 | Nutzer-Import (Profil-Mappings) | `backend/routers/csv_import.py` |
 | Vorlagen-Validierung (strukturell + Sample) | `backend/csv_parser/template_validator.py` (`validate_csv_template`) |
+| Effektives Listentrennzeichen | `backend/csv_parser/core.py` (`resolve_effective_csv_delimiter`) — Datei kann `;` (z. B. Apple DE) haben, Vorlage `,` (EN); Import/Diagnose **nicht** nur das gespeicherte Trennzeichen blind nutzen. |
 
 **Single Source of Truth** für erlaubte Zielfelder, Typen und Duplikat-Keys ist **`module_registry.py`**. Keine parallele Feldliste in Routern duplizieren.
 
diff --git a/backend/csv_parser/core.py b/backend/csv_parser/core.py
index eb23a9f..444c9a1 100644
--- a/backend/csv_parser/core.py
+++ b/backend/csv_parser/core.py
@@ -47,6 +47,46 @@ def sniff_delimiter(sample_line: str) -> str:
     return best
 
 
+def _csv_field_count(line: str, delimiter: str) -> int:
+    """Anzahl Felder in einer Zeile (csv.reader, berücksichtigt Anführungszeichen)."""
+    if not line or not line.strip():
+        return 0
+    try:
+        row = next(csv.reader(io.StringIO(line), delimiter=delimiter))
+    except StopIteration:
+        return 0
+    return len(row)
+
+
+def resolve_effective_csv_delimiter(text: str, template_delimiter: str | None = None) -> str:
+    """
+    Trennzeichen für die hochgeladene Datei wählen. Gespeicherte Vorlagen haben oft «,»
+    (Apple EN), tatsächliche Exporte je nach Region «;» (Apple DE / Excel) — mit falschem
+    Zeichen wird die Kopfzeile zu **einer** Spalte und das Mapping bricht vollständig.
+    """
+    tpl = (template_delimiter or "").strip()
+    if tpl not in _DEFAULT_DELIMS:
+        tpl = None
+
+    lines = _split_first_lines(text, max_lines=5)
+    if not lines:
+        return tpl or ","
+
+    header = lines[0]
+    scores: list[tuple[int, str]] = []
+    for d in _DEFAULT_DELIMS:
+        scores.append((_csv_field_count(header, d), d))
+
+    max_n = max(n for n, _ in scores)
+    if max_n <= 1:
+        return tpl or sniff_delimiter(header)
+
+    at_max = [d for n, d in scores if n == max_n]
+    if tpl and tpl in at_max:
+        return tpl
+    return at_max[0]
+
+
 def _split_first_lines(text: str, max_lines: int = 5) -> List[str]:
     lines: List[str] = []
     for line in text.splitlines():
diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py
index 67c78c7..6a63874 100644
--- a/backend/csv_parser/executor.py
+++ b/backend/csv_parser/executor.py
@@ -11,7 +11,7 @@ from typing import Any
 
 import logging
 
-from csv_parser.core import iter_csv_dict_rows
+from csv_parser.core import iter_csv_dict_rows, resolve_effective_csv_delimiter
 from csv_parser.import_row_processing import (
     aggregate_mapped_rows,
     resolve_import_row_processing,
@@ -97,7 +97,8 @@ def run_universal_csv_import(
     if tc is not None and not isinstance(tc, dict):
         tc = None
 
-    delim = mapping.get("delimiter") or ","
+    tpl_delim = str(mapping.get("delimiter") or ",").strip() or ","
+    delim = resolve_effective_csv_delimiter(text, tpl_delim)
     has_header = mapping.get("has_header", True)
 
     if module == "nutrition":
diff --git a/backend/routers/csv_import.py b/backend/routers/csv_import.py
index b3ab67a..c6dc535 100644
--- a/backend/routers/csv_import.py
+++ b/backend/routers/csv_import.py
@@ -29,6 +29,7 @@ from csv_parser.core import (
     iter_csv_dict_rows,
     normalize_header_for_signature,
     parse_csv_sample,
+    resolve_effective_csv_delimiter,
 )
 from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping
 from csv_parser.field_units import source_unit_choices_for_field
@@ -393,7 +394,8 @@ async def csv_import_diagnose(
     tc = m.get("type_conversions")
     if not isinstance(tc, dict):
         tc = {}
-    delim = str(m.get("delimiter") or ",")
+    tpl_delim = str(m.get("delimiter") or ",").strip() or ","
+    delim = resolve_effective_csv_delimiter(text, tpl_delim)
     exec_module = str(m["module"])
 
     rows_out: list[dict[str, Any]] = []
@@ -418,6 +420,7 @@ async def csv_import_diagnose(
         "mapping_id": mapping_id,
         "mapping_name": m.get("mapping_name"),
         "module": exec_module,
+        "delimiter_template": tpl_delim,
         "delimiter_used": delim,
         "has_header": bool(m.get("has_header", True)),
         "rows_diagnosed": len(rows_out),
diff --git a/backend/tests/test_csv_parser_core.py b/backend/tests/test_csv_parser_core.py
index 3e27673..917510f 100644
--- a/backend/tests/test_csv_parser_core.py
+++ b/backend/tests/test_csv_parser_core.py
@@ -11,6 +11,7 @@ from csv_parser.core import (
     headers_signature_rank_metrics,
     get_csv_import_limits,
     iter_csv_dict_rows,
+    resolve_effective_csv_delimiter,
 )
 from csv_parser.field_units import source_unit_choices_for_field
 from csv_parser.mapping_suggest import build_type_conversions_for_mapping
@@ -29,6 +30,20 @@ def test_sniff_delimiter():
     assert sniff_delimiter("a,b,c") == ","
 
 
+def test_resolve_effective_csv_delimiter_semicolon_file_comma_template():
+    """DE-Apple: «;» in der Datei, englische Vorlage speichert «,»."""
+    header = "Workout Type;Start;End;Duration;Aktive Energie (kJ)"
+    row = "Laufen;2026-04-17 16:25;2026-04-17 17:00;00:30:00;500"
+    text = header + "\n" + row + "\n"
+    assert resolve_effective_csv_delimiter(text, ",") == ";"
+    assert resolve_effective_csv_delimiter(text, None) == ";"
+
+
+def test_resolve_effective_csv_delimiter_comma_file_keeps_template():
+    text = "Workout Type,Start,End\nWalk,2026-04-17 16:25,2026-04-17 17:00\n"
+    assert resolve_effective_csv_delimiter(text, ",") == ","
+
+
 def test_parse_csv_sample_header():
     text = "Date;kcal\n2024-01-01;2000\n"
     headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3)
-- 
2.43.0


From 6756dc60f37d94e4f6ccf810cc692a2a101b1aa9 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 18 Apr 2026 10:24:44 +0200
Subject: [PATCH 2/4] feat: enhance session metrics handling in activity
 summaries

- Integrated compact JSON payload generation for session metrics in `get_training_sessions_recent_weeks_data`.
- Updated the registration of activity session insights to reflect the new compact format for session metrics.
- Improved documentation to clarify the structure and semantics of the session metrics in the JSON output.
- Added normalization for prompt numbers to ensure consistent formatting in the metrics.
---
 backend/data_layer/activity_metrics.py        |  19 +++-
 backend/data_layer/prompt_output_compact.py   | 102 ++++++++++++++++++
 .../activity_session_insights.py              |  17 ++-
 backend/placeholder_resolver.py               |   6 +-
 backend/tests/test_prompt_output_compact.py   |  59 ++++++++++
 5 files changed, 188 insertions(+), 15 deletions(-)
 create mode 100644 backend/data_layer/prompt_output_compact.py
 create mode 100644 backend/tests/test_prompt_output_compact.py

diff --git a/backend/data_layer/activity_metrics.py b/backend/data_layer/activity_metrics.py
index 9c27451..ebb1731 100644
--- a/backend/data_layer/activity_metrics.py
+++ b/backend/data_layer/activity_metrics.py
@@ -25,6 +25,10 @@ import statistics
 from db import get_db, get_cursor, r2d
 from data_layer.activity_session_metrics import enrich_sessions_with_metrics
 from data_layer.utils import calculate_confidence, safe_float, safe_int, serialize_dates
+from data_layer.prompt_output_compact import (
+    normalize_prompt_number,
+    session_metrics_list_to_key_value_compact,
+)
 
 
 def get_activity_summary_data(
@@ -1094,6 +1098,10 @@ def get_training_sessions_recent_weeks_data(
     Letzte Wochen mit Einzeltrainings für KI-Kontext (Dauer, kcal, HF, Typ).
 
     weeks: Anzahl zurückliegender ISO-Kalenderwochen (Default 4).
+
+    session_metrics pro Einheit: kompaktes Objekt ``{key: Wert}`` (keine wiederholten
+    Namen/Beschreibungen). Bedeutung der Keys: Platzhalter ``{{training_parameters_glossary_md}}``.
+    Zahlen werden für Prompt-Token kompakt gerundet.
     """
     days = max(weeks * 7, 7)
     with get_db() as conn:
@@ -1131,6 +1139,8 @@ def get_training_sessions_recent_weeks_data(
                 "days_loaded": days,
                 "session_count": 0,
                 "confidence": "insufficient",
+                "session_metrics_shape": "key_value",
+                "metric_semantics_placeholder": "{{training_parameters_glossary_md}}",
             },
         }
 
@@ -1149,6 +1159,7 @@ def get_training_sessions_recent_weeks_data(
         kcal_f = float(kcal) if kcal is not None else None
         hr_a = r.get("hr_avg")
         hr_m = r.get("hr_max")
+        sm_compact = session_metrics_list_to_key_value_compact(r.get("session_metrics"))
         by_week[wk].append(
             {
                 "id": str(r["id"]),
@@ -1157,12 +1168,12 @@ def get_training_sessions_recent_weeks_data(
                 "activity_type": r.get("activity_type"),
                 "training_category": r.get("training_category"),
                 "training_type_name": r.get("training_type_name"),
-                "duration_min": dur_f,
-                "kcal_active": kcal_f,
+                "duration_min": normalize_prompt_number(dur_f) if dur_f is not None else None,
+                "kcal_active": normalize_prompt_number(kcal_f) if kcal_f is not None else None,
                 "hr_avg": int(hr_a) if hr_a is not None else None,
                 "hr_max": int(hr_m) if hr_m is not None else None,
                 "rpe": int(r["rpe"]) if r.get("rpe") is not None else None,
-                "session_metrics": r.get("session_metrics", []),
+                "session_metrics": sm_compact,
             }
         )
 
@@ -1177,6 +1188,8 @@ def get_training_sessions_recent_weeks_data(
                 "days_loaded": days,
                 "session_count": len(rows),
                 "confidence": confidence,
+                "session_metrics_shape": "key_value",
+                "metric_semantics_placeholder": "{{training_parameters_glossary_md}}",
             },
         }
     )
diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py
new file mode 100644
index 0000000..d74994a
--- /dev/null
+++ b/backend/data_layer/prompt_output_compact.py
@@ -0,0 +1,102 @@
+"""
+Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
+
+- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
+- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
+- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
+"""
+from __future__ import annotations
+
+import math
+from decimal import Decimal
+from typing import Any
+
+
+def compact_float_for_prompt(x: float) -> float | int:
+    """
+    Reduziert unnötige Nachkommastellen; erhält kleine Beträge (<0,1) mit mehr Stellen.
+    """
+    if not math.isfinite(x):
+        return x
+    ax = abs(x)
+    if ax == 0.0:
+        return 0
+    if ax >= 100.0:
+        return int(round(x))
+    if ax >= 10.0:
+        return int(round(x))
+    if ax >= 1.0:
+        r = round(x, 2)
+        return int(r) if abs(r - int(round(r))) < 1e-6 else r
+    if ax >= 0.1:
+        r = round(x, 2)
+        return int(r) if abs(r - int(round(r))) < 1e-6 else r
+    if ax >= 0.01:
+        return round(x, 3)
+    return round(x, 4)
+
+
+def normalize_prompt_number(x: Any) -> Any:
+    """int/Decimal/float kompakt; Rest unverändert."""
+    if x is None:
+        return None
+    if isinstance(x, bool):
+        return x
+    if isinstance(x, int) and not isinstance(x, bool):
+        return x
+    if isinstance(x, Decimal):
+        try:
+            xf = float(x)
+        except Exception:
+            return x
+        return compact_float_for_prompt(xf)
+    if isinstance(x, float):
+        return compact_float_for_prompt(x)
+    return x
+
+
+def compact_json_payload_for_prompts(obj: Any) -> Any:
+    """
+    Tiefe Kopie mit kompakten Zahlen (dicts/list/tuples rekursiv).
+    Strings und dict-Keys werden nicht verändert.
+    """
+    if obj is None:
+        return None
+    if isinstance(obj, dict):
+        return {k: compact_json_payload_for_prompts(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        t = [compact_json_payload_for_prompts(v) for v in obj]
+        return tuple(t) if isinstance(obj, tuple) else t
+    return normalize_prompt_number(obj)
+
+
+def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]:
+    """
+    Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen).
+
+    Semantik: {{training_parameters_glossary_md}} im Prompt ergänzen.
+    """
+    out: dict[str, Any] = {}
+    for m in metrics or []:
+        if not isinstance(m, dict):
+            continue
+        k = m.get("key")
+        if not k:
+            continue
+        v = m.get("value")
+        dt = (m.get("data_type") or "").lower()
+        if v is None:
+            out[str(k)] = None
+            continue
+        if dt == "integer":
+            try:
+                out[str(k)] = int(v)
+            except (TypeError, ValueError):
+                out[str(k)] = normalize_prompt_number(v)
+        elif dt == "boolean":
+            out[str(k)] = bool(v)
+        elif dt == "string":
+            out[str(k)] = str(v)
+        else:
+            out[str(k)] = normalize_prompt_number(v)
+    return out
diff --git a/backend/placeholder_registrations/activity_session_insights.py b/backend/placeholder_registrations/activity_session_insights.py
index 0e49eb9..5bbab48 100644
--- a/backend/placeholder_registrations/activity_session_insights.py
+++ b/backend/placeholder_registrations/activity_session_insights.py
@@ -130,8 +130,8 @@ def register_activity_session_insights():
         key="training_sessions_recent_json",
         category="Aktivität",
         description=(
-            "JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics[] — gemergte Profil-Metriken "
-            "(dynamische Keys)"
+            "JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics als kompaktes "
+            "{key: Wert}-Objekt; Zahlen für Prompts gekürzt. Semantik: {{training_parameters_glossary_md}}."
         ),
         resolver_module="backend/placeholder_resolver.py",
         resolver_function="_safe_json",
@@ -141,13 +141,10 @@ def register_activity_session_insights():
         semantic_contract=(
             "Root: weeks[] mit week_iso; sessions[] pro Einheit u. a. id, date, activity_type, "
             "duration_min, kcal_active, hr_avg, hr_max, rpe, training_category, training_type_name, "
-            "session_metrics[]. "
-            "session_metrics: effektive Liste nach merge_column_backed_and_eav_metrics — Einträge mit "
-            "training_parameter_id, key, data_type, unit, value, name_de/name_en, description_de/description_en; "
-            "nur Parameter aus Attributschema "
-            "(training_category_parameter + training_type_parameter Overrides), keys sortiert. "
-            "Kanon Lesen: activity_log-Spalte vor EAV bei Konflikt. "
-            "meta: weeks_requested, days_loaded, session_count, confidence. "
+            "session_metrics (Objekt key→Wert, keine wiederholten Labels). "
+            "Merge wie merge_column_backed_and_eav_metrics; nur Keys aus Attributschema. "
+            "meta.session_metrics_shape=key_value, meta.metric_semantics_placeholder verweist auf Glossary-Platzhalter. "
+            "Alle JSON-Platzhalter mit _safe_json: Zahlen rekursiv kompakt gerundet. "
             "Default ca. 4 ISO-Wochen (28 Tage Rohdatenfenster)."
         ),
         business_meaning="Rohkontext für wochenweise Auswertung (Erholung, Intensität) in der KI",
@@ -171,7 +168,7 @@ def register_activity_session_insights():
             "session_metrics oft [] (kein Typ, kein Profil, keine gespeicherten Werte). "
             "Anzahl und Namen der Metrik-Keys sind instanz-/adminabhängig — JSON nicht als festes Schema "
             "für Downstream-Parsing harter Logik verwenden. "
-            "Für KI-Semantik zusätzlich {{training_parameters_glossary_md}} (gesamter aktiver Katalog) in den Prompt legen. "
+            "Pflicht für Metrik-Bedeutung: {{training_parameters_glossary_md}} (Katalog); im JSON keine Namen/Beschreibungen pro Session. "
             "Composite-Parameter (JSON in EAV) noch nicht im MVP expandiert; ggf. Roh-value_text in späterer Phase."
         ),
         layer_1_decision="activity_metrics.get_training_sessions_recent_weeks_data",
diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py
index 6f635c2..bdb248f 100644
--- a/backend/placeholder_resolver.py
+++ b/backend/placeholder_resolver.py
@@ -48,6 +48,8 @@ from data_layer.health_metrics import (
     get_vo2_max_data
 )
 
+from data_layer.prompt_output_compact import compact_json_payload_for_prompts
+
 from placeholder_registry import build_ai_placeholder_caption, get_registry
 
 # {{key|d}} — nur description anhängen; {{key|x}} — nur Erklärung (ai_caption / Registry)
@@ -1028,8 +1030,8 @@ def _safe_json(func_name: str, profile_id: str) -> str:
         # If already string, return it; otherwise convert to JSON
         if isinstance(result, str):
             return result
-        else:
-            return json.dumps(result, ensure_ascii=False, default=str)
+        compacted = compact_json_payload_for_prompts(result)
+        return json.dumps(compacted, ensure_ascii=False, default=str)
     except Exception as e:
         print(f"[ERROR] _safe_json({func_name}, {profile_id}): {type(e).__name__}: {e}")
         traceback.print_exc()
diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py
new file mode 100644
index 0000000..cefae36
--- /dev/null
+++ b/backend/tests/test_prompt_output_compact.py
@@ -0,0 +1,59 @@
+"""Tests für data_layer.prompt_output_compact (KI-Platzhalter, Token)."""
+
+import pytest
+
+from data_layer.prompt_output_compact import (
+    compact_float_for_prompt,
+    compact_json_payload_for_prompts,
+    normalize_prompt_number,
+    session_metrics_list_to_key_value_compact,
+)
+
+
+@pytest.mark.parametrize(
+    "x,expected",
+    [
+        (0.0, 0),
+        (123.456, 123),
+        (45.67, 46),
+        (9.876, 9.88),
+        (0.99, 0.99),
+        (0.055, 0.055),
+        (0.01234, 0.012),
+    ],
+)
+def test_compact_float_for_prompt(x, expected):
+    out = compact_float_for_prompt(x)
+    if isinstance(expected, float):
+        assert abs(float(out) - expected) < 0.0001
+    else:
+        assert out == expected
+
+
+def test_compact_json_nested():
+    raw = {"a": 12.345678, "b": {"c": 0.0666}, "d": [1.111, 2.0]}
+    out = compact_json_payload_for_prompts(raw)
+    assert out["a"] == 12
+    assert abs(out["b"]["c"] - 0.067) < 0.001
+    assert out["d"][0] == 1.11
+
+
+def test_session_metrics_key_value_only():
+    sm = [
+        {
+            "key": "rpe",
+            "data_type": "integer",
+            "value": 7,
+            "name_de": "RPE",
+            "description_de": "lang",
+        },
+        {
+            "key": "watts",
+            "data_type": "float",
+            "value": 199.999,
+            "unit": "W",
+        },
+    ]
+    out = session_metrics_list_to_key_value_compact(sm)
+    assert out == {"rpe": 7, "watts": 200}
+    assert "name_de" not in str(out)
-- 
2.43.0


From 178534e9eb791541e4f93ff868da4963b3293698 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 18 Apr 2026 10:32:29 +0200
Subject: [PATCH 3/4] feat: enhance formatting and normalization of activity
 metrics

- Introduced `format_scalar_for_prompt_text` function to standardize the representation of scalar values in activity summaries and details.
- Updated `get_activity_summary` and `get_activity_detail` functions to utilize the new formatting for improved readability.
- Added normalization for float values in session metrics to prevent excessively long representations.
- Enhanced unit tests to verify the new formatting and normalization behavior.
---
 .../data_layer/activity_session_metrics.py    | 24 ++++++++++++++
 backend/data_layer/prompt_output_compact.py   | 25 +++++++++++++++
 backend/placeholder_resolver.py               | 19 ++++++++---
 .../tests/test_activity_session_metrics.py    | 32 +++++++++++++++++++
 backend/tests/test_prompt_output_compact.py   |  7 ++++
 5 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py
index 6894559..2681b68 100644
--- a/backend/data_layer/activity_session_metrics.py
+++ b/backend/data_layer/activity_session_metrics.py
@@ -13,9 +13,31 @@ from data_layer.activity_data_canon import (
     ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM,
     ACTIVITY_MODULE_REGISTRY_FIELD_KEYS,
 )
+from data_layer.prompt_output_compact import normalize_prompt_number
 
 logger = logging.getLogger(__name__)
 
+
+def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
+    """Lesepfad (Layer 1): keine unnötig langen Float-Strings für KI/UI (Issue 53 / Platzhalter)."""
+    if val is None:
+        return None
+    dt = (data_type or "").strip().lower()
+    if dt == "string":
+        return val
+    if dt == "boolean":
+        return bool(val)
+    if dt == "integer":
+        try:
+            if isinstance(val, bool):
+                return int(val)
+            return int(val)
+        except (TypeError, ValueError):
+            return normalize_prompt_number(val)
+    if dt == "float":
+        return normalize_prompt_number(val)
+    return normalize_prompt_number(val)
+
 # Diese Spalten nicht aus CSV-Parameter-Zuordnung überschreiben (kommen aus Typ-Mapping / System).
 ACTIVITY_LOG_PATCH_FORBIDDEN = frozenset(
     {
@@ -430,6 +452,8 @@ def merge_column_backed_and_eav_metrics(
             keys_handled.add(k)
 
     merged.sort(key=lambda x: x["key"])
+    for m in merged:
+        m["value"] = _normalize_metric_value_for_read(m.get("data_type") or "", m.get("value"))
     return merged
 
 
diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py
index d74994a..7949c6d 100644
--- a/backend/data_layer/prompt_output_compact.py
+++ b/backend/data_layer/prompt_output_compact.py
@@ -70,6 +70,31 @@ def compact_json_payload_for_prompts(obj: Any) -> Any:
     return normalize_prompt_number(obj)
 
 
+def format_scalar_for_prompt_text(x: Any) -> str:
+    """
+    Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
+    Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number).
+    """
+    if x is None:
+        return "—"
+    if isinstance(x, bool):
+        return "ja" if x else "nein"
+    if isinstance(x, str):
+        return x
+    n = normalize_prompt_number(x)
+    if isinstance(n, bool):
+        return "ja" if n else "nein"
+    if isinstance(n, int) and not isinstance(n, bool):
+        return str(n)
+    if isinstance(n, float):
+        if not math.isfinite(n):
+            return str(n)
+        if abs(n - round(n)) < 1e-9:
+            return str(int(round(n)))
+        return str(n)
+    return str(n)
+
+
 def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]:
     """
     Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen).
diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py
index bdb248f..8f8973d 100644
--- a/backend/placeholder_resolver.py
+++ b/backend/placeholder_resolver.py
@@ -28,6 +28,8 @@ from data_layer.nutrition_metrics import (
     get_nutrition_days_data,
     get_protein_targets_data
 )
+from data_layer.prompt_output_compact import format_scalar_for_prompt_text
+
 from data_layer.activity_metrics import (
     get_activity_summary_data,
     get_activity_detail_data,
@@ -350,7 +352,11 @@ def get_activity_summary(profile_id: str, days: int = 14) -> str:
     if data['confidence'] == 'insufficient':
         return f"Keine Aktivitäten in den letzten {days} Tagen"
 
-    return f"{data['activity_count']} Einheiten in {days} Tagen (Ø {data['avg_duration_min']} min/Einheit, {data['total_kcal']} kcal gesamt)"
+    return (
+        f"{data['activity_count']} Einheiten in {days} Tagen (Ø "
+        f"{format_scalar_for_prompt_text(data['avg_duration_min'])} min/Einheit, "
+        f"{format_scalar_for_prompt_text(data['total_kcal'])} kcal gesamt)"
+    )
 
 
 def calculate_age(dob) -> str:
@@ -423,18 +429,23 @@ def get_activity_detail(profile_id: str, days: int = 14) -> str:
     # Format as readable list (max 20 entries to avoid token bloat)
     lines = []
     for activity in data["activities"][:20]:
-        hr_str = f", HF={activity['hr_avg']}" if activity.get("hr_avg") else ""
+        hr_str = (
+            f", HF={format_scalar_for_prompt_text(activity['hr_avg'])}"
+            if activity.get("hr_avg") is not None
+            else ""
+        )
         eav_parts = []
         for m in activity.get("session_metrics") or []:
             k, v = m.get("key"), m.get("value")
             if k is None or v is None:
                 continue
             label = m.get("name_de") or m.get("name_en") or k
-            eav_parts.append(f"{label} ({k})={v}")
+            eav_parts.append(f"{label} ({k})={format_scalar_for_prompt_text(v)}")
         eav_str = f" | EAV: {'; '.join(eav_parts)}" if eav_parts else ""
         lines.append(
             f"{activity['date']}: {activity['activity_type']} "
-            f"({activity['duration_min']}min, {activity['kcal_active']}kcal{hr_str}{eav_str})"
+            f"({format_scalar_for_prompt_text(activity['duration_min'])}min, "
+            f"{format_scalar_for_prompt_text(activity['kcal_active'])}kcal{hr_str}{eav_str})"
         )
 
     return "\n".join(lines)
diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py
index a2bc11a..0de2bdf 100644
--- a/backend/tests/test_activity_session_metrics.py
+++ b/backend/tests/test_activity_session_metrics.py
@@ -121,6 +121,38 @@ def test_merge_parameter_schema_includes_descriptions():
     assert merged[0]["description_en"] == "5 min average power"
 
 
+def test_merge_eav_float_value_normalized_no_long_tail():
+    """Layer 1: lange Floats (z. B. kcal_per_km) für Lesepfad kompakt."""
+    schema = [
+        {
+            "training_parameter_id": 1,
+            "key": "kcal_per_km",
+            "data_type": "float",
+            "unit": "kcal/km",
+            "validation_rules": {},
+            "source_field": None,
+            "name_de": "Kcal/km",
+            "name_en": "kcal/km",
+            "description_de": None,
+            "description_en": None,
+            "param_category": "performance",
+        }
+    ]
+    eav = [
+        {
+            "training_parameter_id": 1,
+            "key": "kcal_per_km",
+            "data_type": "float",
+            "unit": "kcal/km",
+            "value": 51.5818181818181818,
+        }
+    ]
+    out = merge_column_backed_and_eav_metrics({}, schema, eav)
+    assert len(out) == 1
+    v = out[0]["value"]
+    assert "581818" not in repr(v)
+
+
 def test_merge_column_backed_includes_human_labels_from_schema():
     schema = [
         {
diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py
index cefae36..f00b627 100644
--- a/backend/tests/test_prompt_output_compact.py
+++ b/backend/tests/test_prompt_output_compact.py
@@ -5,6 +5,7 @@ import pytest
 from data_layer.prompt_output_compact import (
     compact_float_for_prompt,
     compact_json_payload_for_prompts,
+    format_scalar_for_prompt_text,
     normalize_prompt_number,
     session_metrics_list_to_key_value_compact,
 )
@@ -38,6 +39,12 @@ def test_compact_json_nested():
     assert out["d"][0] == 1.11
 
 
+def test_format_scalar_no_long_float_tail():
+    s = format_scalar_for_prompt_text(51.5818181818181818)
+    assert "181818" not in s
+    assert len(s) <= 8
+
+
 def test_session_metrics_key_value_only():
     sm = [
         {
-- 
2.43.0


From 7676897fda1f4c604fa638028d14f2214ec9c653 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 18 Apr 2026 10:43:21 +0200
Subject: [PATCH 4/4] feat: enhance normalization of metric values for improved
 handling

- Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types.
- Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively.
- Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting.
---
 .../data_layer/activity_session_metrics.py    |  2 +-
 backend/data_layer/prompt_output_compact.py   | 43 +++++++++++++++----
 .../tests/test_activity_session_metrics.py    |  6 +++
 backend/tests/test_prompt_output_compact.py   | 23 ++++++++++
 4 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py
index 2681b68..0726ff3 100644
--- a/backend/data_layer/activity_session_metrics.py
+++ b/backend/data_layer/activity_session_metrics.py
@@ -24,7 +24,7 @@ def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
         return None
     dt = (data_type or "").strip().lower()
     if dt == "string":
-        return val
+        return normalize_prompt_number(val)
     if dt == "boolean":
         return bool(val)
     if dt == "integer":
diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py
index 7949c6d..8afa08c 100644
--- a/backend/data_layer/prompt_output_compact.py
+++ b/backend/data_layer/prompt_output_compact.py
@@ -4,10 +4,14 @@ Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
 - Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
 - ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
 - Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
+
+Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen —
+diese werden explizit mit float() normalisiert.
 """
 from __future__ import annotations
 
 import math
+import re
 from decimal import Decimal
 from typing import Any
 
@@ -37,22 +41,45 @@ def compact_float_for_prompt(x: float) -> float | int:
 
 
 def normalize_prompt_number(x: Any) -> Any:
-    """int/Decimal/float kompakt; Rest unverändert."""
+    """int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
     if x is None:
         return None
     if isinstance(x, bool):
         return x
     if isinstance(x, int) and not isinstance(x, bool):
         return x
+    if isinstance(x, str):
+        s = x.strip()
+        if not s:
+            return x
+        try:
+            if re.fullmatch(r"-?\d+", s):
+                return int(s)
+            xf = float(s)
+        except ValueError:
+            return x
+        if not math.isfinite(xf):
+            return x
+        return compact_float_for_prompt(xf)
     if isinstance(x, Decimal):
         try:
             xf = float(x)
         except Exception:
             return x
+        if not math.isfinite(xf):
+            return x
         return compact_float_for_prompt(xf)
     if isinstance(x, float):
+        if not math.isfinite(x):
+            return x
         return compact_float_for_prompt(x)
-    return x
+    try:
+        xf = float(x)
+    except (TypeError, ValueError):
+        return x
+    if not math.isfinite(xf):
+        return x
+    return compact_float_for_prompt(xf)
 
 
 def compact_json_payload_for_prompts(obj: Any) -> Any:
@@ -73,25 +100,23 @@ def compact_json_payload_for_prompts(obj: Any) -> Any:
 def format_scalar_for_prompt_text(x: Any) -> str:
     """
     Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
-    Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number).
+    Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
     """
     if x is None:
         return "—"
     if isinstance(x, bool):
         return "ja" if x else "nein"
-    if isinstance(x, str):
-        return x
     n = normalize_prompt_number(x)
     if isinstance(n, bool):
         return "ja" if n else "nein"
+    if isinstance(n, str):
+        return n
     if isinstance(n, int) and not isinstance(n, bool):
         return str(n)
     if isinstance(n, float):
         if not math.isfinite(n):
             return str(n)
-        if abs(n - round(n)) < 1e-9:
-            return str(int(round(n)))
-        return str(n)
+        return "%g" % n
     return str(n)
 
 
@@ -121,7 +146,7 @@ def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict
         elif dt == "boolean":
             out[str(k)] = bool(v)
         elif dt == "string":
-            out[str(k)] = str(v)
+            out[str(k)] = normalize_prompt_number(v)
         else:
             out[str(k)] = normalize_prompt_number(v)
     return out
diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py
index 0de2bdf..dacb44d 100644
--- a/backend/tests/test_activity_session_metrics.py
+++ b/backend/tests/test_activity_session_metrics.py
@@ -6,6 +6,7 @@ from unittest.mock import patch
 import pytest
 
 from data_layer.activity_session_metrics import (
+    _normalize_metric_value_for_read,
     ActivitySessionMetricsError,
     enrich_sessions_with_metrics,
     merge_column_backed_and_eav_metrics,
@@ -206,6 +207,11 @@ def test_row_value_tuple_mapping():
     assert _row_value_tuple("boolean", True) == (None, None, None, True)
 
 
+def test_normalize_metric_string_dtype_compacts_numeric_strings():
+    assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52
+    assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext"
+
+
 class _FakeCursor:
     """Sequences fetchone/fetchall for resolve_activity_attribute_schema."""
 
diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py
index f00b627..7789ce8 100644
--- a/backend/tests/test_prompt_output_compact.py
+++ b/backend/tests/test_prompt_output_compact.py
@@ -45,6 +45,29 @@ def test_format_scalar_no_long_float_tail():
     assert len(s) <= 8
 
 
+def test_format_scalar_numeric_string_no_long_tail():
+    s = format_scalar_for_prompt_text("51.581818181818181818")
+    assert "181818" not in s
+
+
+def test_session_metrics_string_dtype_compacts_numeric_strings():
+    sm = [
+        {
+            "key": "temp_c",
+            "data_type": "string",
+            "value": "22.333333333333336",
+        },
+        {
+            "key": "kcal_per_km",
+            "data_type": "string",
+            "value": "51.581818181818181818",
+        },
+    ]
+    out = session_metrics_list_to_key_value_compact(sm)
+    assert out["temp_c"] == 22
+    assert out["kcal_per_km"] == 52
+
+
 def test_session_metrics_key_value_only():
     sm = [
         {
-- 
2.43.0