mitai-jinkendo/backend/data_layer/prompt_output_compact.py
Lars 7676897fda
All checks were successful
Deploy Development / deploy (push) Successful in 50s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s
feat: enhance normalization of metric values for improved handling
- Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types.
- Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively.
- Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting.
2026-04-18 10:43:21 +02:00

153 lines
4.5 KiB
Python

"""
Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen —
diese werden explizit mit float() normalisiert.
"""
from __future__ import annotations
import math
import re
from decimal import Decimal
from typing import Any
def compact_float_for_prompt(x: float) -> float | int:
"""
Reduziert unnötige Nachkommastellen; erhält kleine Beträge (<0,1) mit mehr Stellen.
"""
if not math.isfinite(x):
return x
ax = abs(x)
if ax == 0.0:
return 0
if ax >= 100.0:
return int(round(x))
if ax >= 10.0:
return int(round(x))
if ax >= 1.0:
r = round(x, 2)
return int(r) if abs(r - int(round(r))) < 1e-6 else r
if ax >= 0.1:
r = round(x, 2)
return int(r) if abs(r - int(round(r))) < 1e-6 else r
if ax >= 0.01:
return round(x, 3)
return round(x, 4)
def normalize_prompt_number(x: Any) -> Any:
"""int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
if x is None:
return None
if isinstance(x, bool):
return x
if isinstance(x, int) and not isinstance(x, bool):
return x
if isinstance(x, str):
s = x.strip()
if not s:
return x
try:
if re.fullmatch(r"-?\d+", s):
return int(s)
xf = float(s)
except ValueError:
return x
if not math.isfinite(xf):
return x
return compact_float_for_prompt(xf)
if isinstance(x, Decimal):
try:
xf = float(x)
except Exception:
return x
if not math.isfinite(xf):
return x
return compact_float_for_prompt(xf)
if isinstance(x, float):
if not math.isfinite(x):
return x
return compact_float_for_prompt(x)
try:
xf = float(x)
except (TypeError, ValueError):
return x
if not math.isfinite(xf):
return x
return compact_float_for_prompt(xf)
def compact_json_payload_for_prompts(obj: Any) -> Any:
"""
Tiefe Kopie mit kompakten Zahlen (dicts/list/tuples rekursiv).
Strings und dict-Keys werden nicht verändert.
"""
if obj is None:
return None
if isinstance(obj, dict):
return {k: compact_json_payload_for_prompts(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
t = [compact_json_payload_for_prompts(v) for v in obj]
return tuple(t) if isinstance(obj, tuple) else t
return normalize_prompt_number(obj)
def format_scalar_for_prompt_text(x: Any) -> str:
"""
Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
"""
if x is None:
return ""
if isinstance(x, bool):
return "ja" if x else "nein"
n = normalize_prompt_number(x)
if isinstance(n, bool):
return "ja" if n else "nein"
if isinstance(n, str):
return n
if isinstance(n, int) and not isinstance(n, bool):
return str(n)
if isinstance(n, float):
if not math.isfinite(n):
return str(n)
return "%g" % n
return str(n)
def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]:
"""
Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen).
Semantik: {{training_parameters_glossary_md}} im Prompt ergänzen.
"""
out: dict[str, Any] = {}
for m in metrics or []:
if not isinstance(m, dict):
continue
k = m.get("key")
if not k:
continue
v = m.get("value")
dt = (m.get("data_type") or "").lower()
if v is None:
out[str(k)] = None
continue
if dt == "integer":
try:
out[str(k)] = int(v)
except (TypeError, ValueError):
out[str(k)] = normalize_prompt_number(v)
elif dt == "boolean":
out[str(k)] = bool(v)
elif dt == "string":
out[str(k)] = normalize_prompt_number(v)
else:
out[str(k)] = normalize_prompt_number(v)
return out