diff --git a/backend/data_layer/activity_metrics.py b/backend/data_layer/activity_metrics.py index 9c27451..ebb1731 100644 --- a/backend/data_layer/activity_metrics.py +++ b/backend/data_layer/activity_metrics.py @@ -25,6 +25,10 @@ import statistics from db import get_db, get_cursor, r2d from data_layer.activity_session_metrics import enrich_sessions_with_metrics from data_layer.utils import calculate_confidence, safe_float, safe_int, serialize_dates +from data_layer.prompt_output_compact import ( + normalize_prompt_number, + session_metrics_list_to_key_value_compact, +) def get_activity_summary_data( @@ -1094,6 +1098,10 @@ def get_training_sessions_recent_weeks_data( Letzte Wochen mit Einzeltrainings für KI-Kontext (Dauer, kcal, HF, Typ). weeks: Anzahl zurückliegender ISO-Kalenderwochen (Default 4). + + session_metrics pro Einheit: kompaktes Objekt ``{key: Wert}`` (keine wiederholten + Namen/Beschreibungen). Bedeutung der Keys: Platzhalter ``{{training_parameters_glossary_md}}``. + Zahlen werden für Prompt-Token kompakt gerundet. """ days = max(weeks * 7, 7) with get_db() as conn: @@ -1131,6 +1139,8 @@ def get_training_sessions_recent_weeks_data( "days_loaded": days, "session_count": 0, "confidence": "insufficient", + "session_metrics_shape": "key_value", + "metric_semantics_placeholder": "{{training_parameters_glossary_md}}", }, } @@ -1149,6 +1159,7 @@ def get_training_sessions_recent_weeks_data( kcal_f = float(kcal) if kcal is not None else None hr_a = r.get("hr_avg") hr_m = r.get("hr_max") + sm_compact = session_metrics_list_to_key_value_compact(r.get("session_metrics")) by_week[wk].append( { "id": str(r["id"]), @@ -1157,12 +1168,12 @@ def get_training_sessions_recent_weeks_data( "activity_type": r.get("activity_type"), "training_category": r.get("training_category"), "training_type_name": r.get("training_type_name"), - "duration_min": dur_f, - "kcal_active": kcal_f, + "duration_min": normalize_prompt_number(dur_f) if dur_f is not None else None, + "kcal_active": normalize_prompt_number(kcal_f) if kcal_f is not None else None, "hr_avg": int(hr_a) if hr_a is not None else None, "hr_max": int(hr_m) if hr_m is not None else None, "rpe": int(r["rpe"]) if r.get("rpe") is not None else None, - "session_metrics": r.get("session_metrics", []), + "session_metrics": sm_compact, } ) @@ -1177,6 +1188,8 @@ def get_training_sessions_recent_weeks_data( "days_loaded": days, "session_count": len(rows), "confidence": confidence, + "session_metrics_shape": "key_value", + "metric_semantics_placeholder": "{{training_parameters_glossary_md}}", }, } ) diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py new file mode 100644 index 0000000..d74994a --- /dev/null +++ b/backend/data_layer/prompt_output_compact.py @@ -0,0 +1,102 @@ +""" +Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen). + +- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision). +- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar. +- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar. +""" +from __future__ import annotations + +import math +from decimal import Decimal +from typing import Any + + +def compact_float_for_prompt(x: float) -> float | int: + """ + Reduziert unnötige Nachkommastellen; erhält kleine Beträge (<0,1) mit mehr Stellen. + """ + if not math.isfinite(x): + return x + ax = abs(x) + if ax == 0.0: + return 0 + if ax >= 100.0: + return int(round(x)) + if ax >= 10.0: + return int(round(x)) + if ax >= 1.0: + r = round(x, 2) + return int(r) if abs(r - int(round(r))) < 1e-6 else r + if ax >= 0.1: + r = round(x, 2) + return int(r) if abs(r - int(round(r))) < 1e-6 else r + if ax >= 0.01: + return round(x, 3) + return round(x, 4) + + +def normalize_prompt_number(x: Any) -> Any: + """int/Decimal/float kompakt; Rest unverändert.""" + if x is None: + return None + if isinstance(x, bool): + return x + if isinstance(x, int) and not isinstance(x, bool): + return x + if isinstance(x, Decimal): + try: + xf = float(x) + except Exception: + return x + return compact_float_for_prompt(xf) + if isinstance(x, float): + return compact_float_for_prompt(x) + return x + + +def compact_json_payload_for_prompts(obj: Any) -> Any: + """ + Tiefe Kopie mit kompakten Zahlen (dicts/list/tuples rekursiv). + Strings und dict-Keys werden nicht verändert. + """ + if obj is None: + return None + if isinstance(obj, dict): + return {k: compact_json_payload_for_prompts(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + t = [compact_json_payload_for_prompts(v) for v in obj] + return tuple(t) if isinstance(obj, tuple) else t + return normalize_prompt_number(obj) + + +def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]: + """ + Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen). + + Semantik: {{training_parameters_glossary_md}} im Prompt ergänzen. + """ + out: dict[str, Any] = {} + for m in metrics or []: + if not isinstance(m, dict): + continue + k = m.get("key") + if not k: + continue + v = m.get("value") + dt = (m.get("data_type") or "").lower() + if v is None: + out[str(k)] = None + continue + if dt == "integer": + try: + out[str(k)] = int(v) + except (TypeError, ValueError): + out[str(k)] = normalize_prompt_number(v) + elif dt == "boolean": + out[str(k)] = bool(v) + elif dt == "string": + out[str(k)] = str(v) + else: + out[str(k)] = normalize_prompt_number(v) + return out diff --git a/backend/placeholder_registrations/activity_session_insights.py b/backend/placeholder_registrations/activity_session_insights.py index 0e49eb9..5bbab48 100644 --- a/backend/placeholder_registrations/activity_session_insights.py +++ b/backend/placeholder_registrations/activity_session_insights.py @@ -130,8 +130,8 @@ def register_activity_session_insights(): key="training_sessions_recent_json", category="Aktivität", description=( - "JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics[] — gemergte Profil-Metriken " - "(dynamische Keys)" + "JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics als kompaktes " + "{key: Wert}-Objekt; Zahlen für Prompts gekürzt. Semantik: {{training_parameters_glossary_md}}." ), resolver_module="backend/placeholder_resolver.py", resolver_function="_safe_json", @@ -141,13 +141,10 @@ def register_activity_session_insights(): semantic_contract=( "Root: weeks[] mit week_iso; sessions[] pro Einheit u. a. id, date, activity_type, " "duration_min, kcal_active, hr_avg, hr_max, rpe, training_category, training_type_name, " - "session_metrics[]. " - "session_metrics: effektive Liste nach merge_column_backed_and_eav_metrics — Einträge mit " - "training_parameter_id, key, data_type, unit, value, name_de/name_en, description_de/description_en; " - "nur Parameter aus Attributschema " - "(training_category_parameter + training_type_parameter Overrides), keys sortiert. " - "Kanon Lesen: activity_log-Spalte vor EAV bei Konflikt. " - "meta: weeks_requested, days_loaded, session_count, confidence. " + "session_metrics (Objekt key→Wert, keine wiederholten Labels). " + "Merge wie merge_column_backed_and_eav_metrics; nur Keys aus Attributschema. " + "meta.session_metrics_shape=key_value, meta.metric_semantics_placeholder verweist auf Glossary-Platzhalter. " + "Alle JSON-Platzhalter mit _safe_json: Zahlen rekursiv kompakt gerundet. " "Default ca. 4 ISO-Wochen (28 Tage Rohdatenfenster)." ), business_meaning="Rohkontext für wochenweise Auswertung (Erholung, Intensität) in der KI", @@ -171,7 +168,7 @@ def register_activity_session_insights(): "session_metrics oft [] (kein Typ, kein Profil, keine gespeicherten Werte). " "Anzahl und Namen der Metrik-Keys sind instanz-/adminabhängig — JSON nicht als festes Schema " "für Downstream-Parsing harter Logik verwenden. " - "Für KI-Semantik zusätzlich {{training_parameters_glossary_md}} (gesamter aktiver Katalog) in den Prompt legen. " + "Pflicht für Metrik-Bedeutung: {{training_parameters_glossary_md}} (Katalog); im JSON keine Namen/Beschreibungen pro Session. " "Composite-Parameter (JSON in EAV) noch nicht im MVP expandiert; ggf. Roh-value_text in späterer Phase." ), layer_1_decision="activity_metrics.get_training_sessions_recent_weeks_data", diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py index 6f635c2..bdb248f 100644 --- a/backend/placeholder_resolver.py +++ b/backend/placeholder_resolver.py @@ -48,6 +48,8 @@ from data_layer.health_metrics import ( get_vo2_max_data ) +from data_layer.prompt_output_compact import compact_json_payload_for_prompts + from placeholder_registry import build_ai_placeholder_caption, get_registry # {{key|d}} — nur description anhängen; {{key|x}} — nur Erklärung (ai_caption / Registry) @@ -1028,8 +1030,8 @@ def _safe_json(func_name: str, profile_id: str) -> str: # If already string, return it; otherwise convert to JSON if isinstance(result, str): return result - else: - return json.dumps(result, ensure_ascii=False, default=str) + compacted = compact_json_payload_for_prompts(result) + return json.dumps(compacted, ensure_ascii=False, default=str) except Exception as e: print(f"[ERROR] _safe_json({func_name}, {profile_id}): {type(e).__name__}: {e}") traceback.print_exc() diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py new file mode 100644 index 0000000..cefae36 --- /dev/null +++ b/backend/tests/test_prompt_output_compact.py @@ -0,0 +1,59 @@ +"""Tests für data_layer.prompt_output_compact (KI-Platzhalter, Token).""" + +import pytest + +from data_layer.prompt_output_compact import ( + compact_float_for_prompt, + compact_json_payload_for_prompts, + normalize_prompt_number, + session_metrics_list_to_key_value_compact, +) + + +@pytest.mark.parametrize( + "x,expected", + [ + (0.0, 0), + (123.456, 123), + (45.67, 46), + (9.876, 9.88), + (0.99, 0.99), + (0.055, 0.055), + (0.01234, 0.012), + ], +) +def test_compact_float_for_prompt(x, expected): + out = compact_float_for_prompt(x) + if isinstance(expected, float): + assert abs(float(out) - expected) < 0.0001 + else: + assert out == expected + + +def test_compact_json_nested(): + raw = {"a": 12.345678, "b": {"c": 0.0666}, "d": [1.111, 2.0]} + out = compact_json_payload_for_prompts(raw) + assert out["a"] == 12 + assert abs(out["b"]["c"] - 0.067) < 0.001 + assert out["d"][0] == 1.11 + + +def test_session_metrics_key_value_only(): + sm = [ + { + "key": "rpe", + "data_type": "integer", + "value": 7, + "name_de": "RPE", + "description_de": "lang", + }, + { + "key": "watts", + "data_type": "float", + "value": 199.999, + "unit": "W", + }, + ] + out = session_metrics_list_to_key_value_compact(sm) + assert out == {"rpe": 7, "watts": 200} + assert "name_de" not in str(out)