feat: enhance normalization of metric values for improved handling

- Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types. - Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively. - Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting.
2026-04-18 10:43:21 +02:00 · 2026-04-18 10:43:21 +02:00 · 7676897fda
commit 7676897fda
parent 178534e9eb
4 changed files with 64 additions and 10 deletions
--- a/backend/data_layer/activity_session_metrics.py
+++ b/backend/data_layer/activity_session_metrics.py
@ -24,7 +24,7 @@ def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
        return None
    dt = (data_type or "").strip().lower()
    if dt == "string":
-        return val
+        return normalize_prompt_number(val)
    if dt == "boolean":
        return bool(val)
    if dt == "integer":
--- a/backend/data_layer/prompt_output_compact.py
+++ b/backend/data_layer/prompt_output_compact.py
@ -4,10 +4,14 @@ Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
 - Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
 - ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
 - Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
+
+Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen —
+diese werden explizit mit float() normalisiert.
 """
 from __future__ import annotations

 import math
+import re
 from decimal import Decimal
 from typing import Any

@ -37,22 +41,45 @@ def compact_float_for_prompt(x: float) -> float | int:


 def normalize_prompt_number(x: Any) -> Any:
-    """int/Decimal/float kompakt; Rest unverändert."""
+    """int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
    if x is None:
        return None
    if isinstance(x, bool):
        return x
    if isinstance(x, int) and not isinstance(x, bool):
        return x
+    if isinstance(x, str):
+        s = x.strip()
+        if not s:
+            return x
+        try:
+            if re.fullmatch(r"-?\d+", s):
+                return int(s)
+            xf = float(s)
+        except ValueError:
+            return x
+        if not math.isfinite(xf):
+            return x
+        return compact_float_for_prompt(xf)
    if isinstance(x, Decimal):
        try:
            xf = float(x)
        except Exception:
            return x
+        if not math.isfinite(xf):
+            return x
        return compact_float_for_prompt(xf)
    if isinstance(x, float):
+        if not math.isfinite(x):
+            return x
        return compact_float_for_prompt(x)
-    return x
+    try:
+        xf = float(x)
+    except (TypeError, ValueError):
+        return x
+    if not math.isfinite(xf):
+        return x
+    return compact_float_for_prompt(xf)


 def compact_json_payload_for_prompts(obj: Any) -> Any:
@ -73,25 +100,23 @@ def compact_json_payload_for_prompts(obj: Any) -> Any:
 def format_scalar_for_prompt_text(x: Any) -> str:
    """
    Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
-    Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number).
+    Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
    """
    if x is None:
        return "—"
    if isinstance(x, bool):
        return "ja" if x else "nein"
-    if isinstance(x, str):
-        return x
    n = normalize_prompt_number(x)
    if isinstance(n, bool):
        return "ja" if n else "nein"
+    if isinstance(n, str):
+        return n
    if isinstance(n, int) and not isinstance(n, bool):
        return str(n)
    if isinstance(n, float):
        if not math.isfinite(n):
            return str(n)
-        if abs(n - round(n)) < 1e-9:
-            return str(int(round(n)))
-        return str(n)
+        return "%g" % n
    return str(n)


@ -121,7 +146,7 @@ def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict
        elif dt == "boolean":
            out[str(k)] = bool(v)
        elif dt == "string":
-            out[str(k)] = str(v)
+            out[str(k)] = normalize_prompt_number(v)
        else:
            out[str(k)] = normalize_prompt_number(v)
    return out
--- a/backend/tests/test_activity_session_metrics.py
+++ b/backend/tests/test_activity_session_metrics.py
@ -6,6 +6,7 @@ from unittest.mock import patch
 import pytest

 from data_layer.activity_session_metrics import (
+    _normalize_metric_value_for_read,
    ActivitySessionMetricsError,
    enrich_sessions_with_metrics,
    merge_column_backed_and_eav_metrics,
@ -206,6 +207,11 @@ def test_row_value_tuple_mapping():
    assert _row_value_tuple("boolean", True) == (None, None, None, True)


+def test_normalize_metric_string_dtype_compacts_numeric_strings():
+    assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52
+    assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext"
+
+
 class _FakeCursor:
    """Sequences fetchone/fetchall for resolve_activity_attribute_schema."""

--- a/backend/tests/test_prompt_output_compact.py
+++ b/backend/tests/test_prompt_output_compact.py
@ -45,6 +45,29 @@ def test_format_scalar_no_long_float_tail():
    assert len(s) <= 8


+def test_format_scalar_numeric_string_no_long_tail():
+    s = format_scalar_for_prompt_text("51.581818181818181818")
+    assert "181818" not in s
+
+
+def test_session_metrics_string_dtype_compacts_numeric_strings():
+    sm = [
+        {
+            "key": "temp_c",
+            "data_type": "string",
+            "value": "22.333333333333336",
+        },
+        {
+            "key": "kcal_per_km",
+            "data_type": "string",
+            "value": "51.581818181818181818",
+        },
+    ]
+    out = session_metrics_list_to_key_value_compact(sm)
+    assert out["temp_c"] == 22
+    assert out["kcal_per_km"] == 52
+
+
 def test_session_metrics_key_value_only():
    sm = [
        {