feat: enhance normalization of metric values for improved handling
- Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types. - Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively. - Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting.
This commit is contained in:
parent
178534e9eb
commit
7676897fda
|
|
@ -24,7 +24,7 @@ def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
|
|||
return None
|
||||
dt = (data_type or "").strip().lower()
|
||||
if dt == "string":
|
||||
return val
|
||||
return normalize_prompt_number(val)
|
||||
if dt == "boolean":
|
||||
return bool(val)
|
||||
if dt == "integer":
|
||||
|
|
|
|||
|
|
@ -4,10 +4,14 @@ Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
|
|||
- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
|
||||
- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
|
||||
- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
|
||||
|
||||
Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen —
|
||||
diese werden explizit mit float() normalisiert.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import re
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -37,22 +41,45 @@ def compact_float_for_prompt(x: float) -> float | int:
|
|||
|
||||
|
||||
def normalize_prompt_number(x: Any) -> Any:
|
||||
"""int/Decimal/float kompakt; Rest unverändert."""
|
||||
"""int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, bool):
|
||||
return x
|
||||
if isinstance(x, int) and not isinstance(x, bool):
|
||||
return x
|
||||
if isinstance(x, str):
|
||||
s = x.strip()
|
||||
if not s:
|
||||
return x
|
||||
try:
|
||||
if re.fullmatch(r"-?\d+", s):
|
||||
return int(s)
|
||||
xf = float(s)
|
||||
except ValueError:
|
||||
return x
|
||||
if not math.isfinite(xf):
|
||||
return x
|
||||
return compact_float_for_prompt(xf)
|
||||
if isinstance(x, Decimal):
|
||||
try:
|
||||
xf = float(x)
|
||||
except Exception:
|
||||
return x
|
||||
if not math.isfinite(xf):
|
||||
return x
|
||||
return compact_float_for_prompt(xf)
|
||||
if isinstance(x, float):
|
||||
if not math.isfinite(x):
|
||||
return x
|
||||
return compact_float_for_prompt(x)
|
||||
return x
|
||||
try:
|
||||
xf = float(x)
|
||||
except (TypeError, ValueError):
|
||||
return x
|
||||
if not math.isfinite(xf):
|
||||
return x
|
||||
return compact_float_for_prompt(xf)
|
||||
|
||||
|
||||
def compact_json_payload_for_prompts(obj: Any) -> Any:
|
||||
|
|
@ -73,25 +100,23 @@ def compact_json_payload_for_prompts(obj: Any) -> Any:
|
|||
def format_scalar_for_prompt_text(x: Any) -> str:
|
||||
"""
|
||||
Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
|
||||
Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number).
|
||||
Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
|
||||
"""
|
||||
if x is None:
|
||||
return "—"
|
||||
if isinstance(x, bool):
|
||||
return "ja" if x else "nein"
|
||||
if isinstance(x, str):
|
||||
return x
|
||||
n = normalize_prompt_number(x)
|
||||
if isinstance(n, bool):
|
||||
return "ja" if n else "nein"
|
||||
if isinstance(n, str):
|
||||
return n
|
||||
if isinstance(n, int) and not isinstance(n, bool):
|
||||
return str(n)
|
||||
if isinstance(n, float):
|
||||
if not math.isfinite(n):
|
||||
return str(n)
|
||||
if abs(n - round(n)) < 1e-9:
|
||||
return str(int(round(n)))
|
||||
return str(n)
|
||||
return "%g" % n
|
||||
return str(n)
|
||||
|
||||
|
||||
|
|
@ -121,7 +146,7 @@ def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict
|
|||
elif dt == "boolean":
|
||||
out[str(k)] = bool(v)
|
||||
elif dt == "string":
|
||||
out[str(k)] = str(v)
|
||||
out[str(k)] = normalize_prompt_number(v)
|
||||
else:
|
||||
out[str(k)] = normalize_prompt_number(v)
|
||||
return out
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from unittest.mock import patch
|
|||
import pytest
|
||||
|
||||
from data_layer.activity_session_metrics import (
|
||||
_normalize_metric_value_for_read,
|
||||
ActivitySessionMetricsError,
|
||||
enrich_sessions_with_metrics,
|
||||
merge_column_backed_and_eav_metrics,
|
||||
|
|
@ -206,6 +207,11 @@ def test_row_value_tuple_mapping():
|
|||
assert _row_value_tuple("boolean", True) == (None, None, None, True)
|
||||
|
||||
|
||||
def test_normalize_metric_string_dtype_compacts_numeric_strings():
|
||||
assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52
|
||||
assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext"
|
||||
|
||||
|
||||
class _FakeCursor:
|
||||
"""Sequences fetchone/fetchall for resolve_activity_attribute_schema."""
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,29 @@ def test_format_scalar_no_long_float_tail():
|
|||
assert len(s) <= 8
|
||||
|
||||
|
||||
def test_format_scalar_numeric_string_no_long_tail():
|
||||
s = format_scalar_for_prompt_text("51.581818181818181818")
|
||||
assert "181818" not in s
|
||||
|
||||
|
||||
def test_session_metrics_string_dtype_compacts_numeric_strings():
|
||||
sm = [
|
||||
{
|
||||
"key": "temp_c",
|
||||
"data_type": "string",
|
||||
"value": "22.333333333333336",
|
||||
},
|
||||
{
|
||||
"key": "kcal_per_km",
|
||||
"data_type": "string",
|
||||
"value": "51.581818181818181818",
|
||||
},
|
||||
]
|
||||
out = session_metrics_list_to_key_value_compact(sm)
|
||||
assert out["temp_c"] == 22
|
||||
assert out["kcal_per_km"] == 52
|
||||
|
||||
|
||||
def test_session_metrics_key_value_only():
|
||||
sm = [
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user