feat: enhance normalization of metric values for improved handling
- Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types. - Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively. - Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting.
This commit is contained in:
parent
178534e9eb
commit
7676897fda
|
|
@ -24,7 +24,7 @@ def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
|
||||||
return None
|
return None
|
||||||
dt = (data_type or "").strip().lower()
|
dt = (data_type or "").strip().lower()
|
||||||
if dt == "string":
|
if dt == "string":
|
||||||
return val
|
return normalize_prompt_number(val)
|
||||||
if dt == "boolean":
|
if dt == "boolean":
|
||||||
return bool(val)
|
return bool(val)
|
||||||
if dt == "integer":
|
if dt == "integer":
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,14 @@ Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
|
||||||
- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
|
- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
|
||||||
- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
|
- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
|
||||||
- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
|
- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
|
||||||
|
|
||||||
|
Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen —
|
||||||
|
diese werden explizit mit float() normalisiert.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
import re
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
@ -37,22 +41,45 @@ def compact_float_for_prompt(x: float) -> float | int:
|
||||||
|
|
||||||
|
|
||||||
def normalize_prompt_number(x: Any) -> Any:
|
def normalize_prompt_number(x: Any) -> Any:
|
||||||
"""int/Decimal/float kompakt; Rest unverändert."""
|
"""int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
|
||||||
if x is None:
|
if x is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(x, bool):
|
if isinstance(x, bool):
|
||||||
return x
|
return x
|
||||||
if isinstance(x, int) and not isinstance(x, bool):
|
if isinstance(x, int) and not isinstance(x, bool):
|
||||||
return x
|
return x
|
||||||
|
if isinstance(x, str):
|
||||||
|
s = x.strip()
|
||||||
|
if not s:
|
||||||
|
return x
|
||||||
|
try:
|
||||||
|
if re.fullmatch(r"-?\d+", s):
|
||||||
|
return int(s)
|
||||||
|
xf = float(s)
|
||||||
|
except ValueError:
|
||||||
|
return x
|
||||||
|
if not math.isfinite(xf):
|
||||||
|
return x
|
||||||
|
return compact_float_for_prompt(xf)
|
||||||
if isinstance(x, Decimal):
|
if isinstance(x, Decimal):
|
||||||
try:
|
try:
|
||||||
xf = float(x)
|
xf = float(x)
|
||||||
except Exception:
|
except Exception:
|
||||||
return x
|
return x
|
||||||
|
if not math.isfinite(xf):
|
||||||
|
return x
|
||||||
return compact_float_for_prompt(xf)
|
return compact_float_for_prompt(xf)
|
||||||
if isinstance(x, float):
|
if isinstance(x, float):
|
||||||
|
if not math.isfinite(x):
|
||||||
|
return x
|
||||||
return compact_float_for_prompt(x)
|
return compact_float_for_prompt(x)
|
||||||
return x
|
try:
|
||||||
|
xf = float(x)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return x
|
||||||
|
if not math.isfinite(xf):
|
||||||
|
return x
|
||||||
|
return compact_float_for_prompt(xf)
|
||||||
|
|
||||||
|
|
||||||
def compact_json_payload_for_prompts(obj: Any) -> Any:
|
def compact_json_payload_for_prompts(obj: Any) -> Any:
|
||||||
|
|
@ -73,25 +100,23 @@ def compact_json_payload_for_prompts(obj: Any) -> Any:
|
||||||
def format_scalar_for_prompt_text(x: Any) -> str:
|
def format_scalar_for_prompt_text(x: Any) -> str:
|
||||||
"""
|
"""
|
||||||
Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
|
Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
|
||||||
Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number).
|
Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
|
||||||
"""
|
"""
|
||||||
if x is None:
|
if x is None:
|
||||||
return "—"
|
return "—"
|
||||||
if isinstance(x, bool):
|
if isinstance(x, bool):
|
||||||
return "ja" if x else "nein"
|
return "ja" if x else "nein"
|
||||||
if isinstance(x, str):
|
|
||||||
return x
|
|
||||||
n = normalize_prompt_number(x)
|
n = normalize_prompt_number(x)
|
||||||
if isinstance(n, bool):
|
if isinstance(n, bool):
|
||||||
return "ja" if n else "nein"
|
return "ja" if n else "nein"
|
||||||
|
if isinstance(n, str):
|
||||||
|
return n
|
||||||
if isinstance(n, int) and not isinstance(n, bool):
|
if isinstance(n, int) and not isinstance(n, bool):
|
||||||
return str(n)
|
return str(n)
|
||||||
if isinstance(n, float):
|
if isinstance(n, float):
|
||||||
if not math.isfinite(n):
|
if not math.isfinite(n):
|
||||||
return str(n)
|
return str(n)
|
||||||
if abs(n - round(n)) < 1e-9:
|
return "%g" % n
|
||||||
return str(int(round(n)))
|
|
||||||
return str(n)
|
|
||||||
return str(n)
|
return str(n)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -121,7 +146,7 @@ def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict
|
||||||
elif dt == "boolean":
|
elif dt == "boolean":
|
||||||
out[str(k)] = bool(v)
|
out[str(k)] = bool(v)
|
||||||
elif dt == "string":
|
elif dt == "string":
|
||||||
out[str(k)] = str(v)
|
out[str(k)] = normalize_prompt_number(v)
|
||||||
else:
|
else:
|
||||||
out[str(k)] = normalize_prompt_number(v)
|
out[str(k)] = normalize_prompt_number(v)
|
||||||
return out
|
return out
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from unittest.mock import patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from data_layer.activity_session_metrics import (
|
from data_layer.activity_session_metrics import (
|
||||||
|
_normalize_metric_value_for_read,
|
||||||
ActivitySessionMetricsError,
|
ActivitySessionMetricsError,
|
||||||
enrich_sessions_with_metrics,
|
enrich_sessions_with_metrics,
|
||||||
merge_column_backed_and_eav_metrics,
|
merge_column_backed_and_eav_metrics,
|
||||||
|
|
@ -206,6 +207,11 @@ def test_row_value_tuple_mapping():
|
||||||
assert _row_value_tuple("boolean", True) == (None, None, None, True)
|
assert _row_value_tuple("boolean", True) == (None, None, None, True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_metric_string_dtype_compacts_numeric_strings():
|
||||||
|
assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52
|
||||||
|
assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext"
|
||||||
|
|
||||||
|
|
||||||
class _FakeCursor:
|
class _FakeCursor:
|
||||||
"""Sequences fetchone/fetchall for resolve_activity_attribute_schema."""
|
"""Sequences fetchone/fetchall for resolve_activity_attribute_schema."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,29 @@ def test_format_scalar_no_long_float_tail():
|
||||||
assert len(s) <= 8
|
assert len(s) <= 8
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_scalar_numeric_string_no_long_tail():
|
||||||
|
s = format_scalar_for_prompt_text("51.581818181818181818")
|
||||||
|
assert "181818" not in s
|
||||||
|
|
||||||
|
|
||||||
|
def test_session_metrics_string_dtype_compacts_numeric_strings():
|
||||||
|
sm = [
|
||||||
|
{
|
||||||
|
"key": "temp_c",
|
||||||
|
"data_type": "string",
|
||||||
|
"value": "22.333333333333336",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "kcal_per_km",
|
||||||
|
"data_type": "string",
|
||||||
|
"value": "51.581818181818181818",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
out = session_metrics_list_to_key_value_compact(sm)
|
||||||
|
assert out["temp_c"] == 22
|
||||||
|
assert out["kcal_per_km"] == 52
|
||||||
|
|
||||||
|
|
||||||
def test_session_metrics_key_value_only():
|
def test_session_metrics_key_value_only():
|
||||||
sm = [
|
sm = [
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user