feat: enhance normalization of metric values for improved handling
All checks were successful
Deploy Development / deploy (push) Successful in 50s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s

- Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types.
- Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively.
- Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting.
This commit is contained in:
Lars 2026-04-18 10:43:21 +02:00
parent 178534e9eb
commit 7676897fda
4 changed files with 64 additions and 10 deletions

View File

@ -24,7 +24,7 @@ def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
return None return None
dt = (data_type or "").strip().lower() dt = (data_type or "").strip().lower()
if dt == "string": if dt == "string":
return val return normalize_prompt_number(val)
if dt == "boolean": if dt == "boolean":
return bool(val) return bool(val)
if dt == "integer": if dt == "integer":

View File

@ -4,10 +4,14 @@ Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision). - Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
- 10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar. - 10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar. - Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen
diese werden explizit mit float() normalisiert.
""" """
from __future__ import annotations from __future__ import annotations
import math import math
import re
from decimal import Decimal from decimal import Decimal
from typing import Any from typing import Any
@ -37,22 +41,45 @@ def compact_float_for_prompt(x: float) -> float | int:
def normalize_prompt_number(x: Any) -> Any: def normalize_prompt_number(x: Any) -> Any:
"""int/Decimal/float kompakt; Rest unverändert.""" """int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
if x is None: if x is None:
return None return None
if isinstance(x, bool): if isinstance(x, bool):
return x return x
if isinstance(x, int) and not isinstance(x, bool): if isinstance(x, int) and not isinstance(x, bool):
return x return x
if isinstance(x, str):
s = x.strip()
if not s:
return x
try:
if re.fullmatch(r"-?\d+", s):
return int(s)
xf = float(s)
except ValueError:
return x
if not math.isfinite(xf):
return x
return compact_float_for_prompt(xf)
if isinstance(x, Decimal): if isinstance(x, Decimal):
try: try:
xf = float(x) xf = float(x)
except Exception: except Exception:
return x return x
if not math.isfinite(xf):
return x
return compact_float_for_prompt(xf) return compact_float_for_prompt(xf)
if isinstance(x, float): if isinstance(x, float):
if not math.isfinite(x):
return x
return compact_float_for_prompt(x) return compact_float_for_prompt(x)
return x try:
xf = float(x)
except (TypeError, ValueError):
return x
if not math.isfinite(xf):
return x
return compact_float_for_prompt(xf)
def compact_json_payload_for_prompts(obj: Any) -> Any: def compact_json_payload_for_prompts(obj: Any) -> Any:
@ -73,25 +100,23 @@ def compact_json_payload_for_prompts(obj: Any) -> Any:
def format_scalar_for_prompt_text(x: Any) -> str: def format_scalar_for_prompt_text(x: Any) -> str:
""" """
Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, ). Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, ).
Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number). Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
""" """
if x is None: if x is None:
return "" return ""
if isinstance(x, bool): if isinstance(x, bool):
return "ja" if x else "nein" return "ja" if x else "nein"
if isinstance(x, str):
return x
n = normalize_prompt_number(x) n = normalize_prompt_number(x)
if isinstance(n, bool): if isinstance(n, bool):
return "ja" if n else "nein" return "ja" if n else "nein"
if isinstance(n, str):
return n
if isinstance(n, int) and not isinstance(n, bool): if isinstance(n, int) and not isinstance(n, bool):
return str(n) return str(n)
if isinstance(n, float): if isinstance(n, float):
if not math.isfinite(n): if not math.isfinite(n):
return str(n) return str(n)
if abs(n - round(n)) < 1e-9: return "%g" % n
return str(int(round(n)))
return str(n)
return str(n) return str(n)
@ -121,7 +146,7 @@ def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict
elif dt == "boolean": elif dt == "boolean":
out[str(k)] = bool(v) out[str(k)] = bool(v)
elif dt == "string": elif dt == "string":
out[str(k)] = str(v) out[str(k)] = normalize_prompt_number(v)
else: else:
out[str(k)] = normalize_prompt_number(v) out[str(k)] = normalize_prompt_number(v)
return out return out

View File

@ -6,6 +6,7 @@ from unittest.mock import patch
import pytest import pytest
from data_layer.activity_session_metrics import ( from data_layer.activity_session_metrics import (
_normalize_metric_value_for_read,
ActivitySessionMetricsError, ActivitySessionMetricsError,
enrich_sessions_with_metrics, enrich_sessions_with_metrics,
merge_column_backed_and_eav_metrics, merge_column_backed_and_eav_metrics,
@ -206,6 +207,11 @@ def test_row_value_tuple_mapping():
assert _row_value_tuple("boolean", True) == (None, None, None, True) assert _row_value_tuple("boolean", True) == (None, None, None, True)
def test_normalize_metric_string_dtype_compacts_numeric_strings():
assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52
assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext"
class _FakeCursor: class _FakeCursor:
"""Sequences fetchone/fetchall for resolve_activity_attribute_schema.""" """Sequences fetchone/fetchall for resolve_activity_attribute_schema."""

View File

@ -45,6 +45,29 @@ def test_format_scalar_no_long_float_tail():
assert len(s) <= 8 assert len(s) <= 8
def test_format_scalar_numeric_string_no_long_tail():
s = format_scalar_for_prompt_text("51.581818181818181818")
assert "181818" not in s
def test_session_metrics_string_dtype_compacts_numeric_strings():
sm = [
{
"key": "temp_c",
"data_type": "string",
"value": "22.333333333333336",
},
{
"key": "kcal_per_km",
"data_type": "string",
"value": "51.581818181818181818",
},
]
out = session_metrics_list_to_key_value_compact(sm)
assert out["temp_c"] == 22
assert out["kcal_per_km"] == 52
def test_session_metrics_key_value_only(): def test_session_metrics_key_value_only():
sm = [ sm = [
{ {