From 7226e04e9c0146f6d6f25c185b9c6f336bd1ff1c Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 18 Apr 2026 10:12:33 +0200 Subject: [PATCH 1/4] feat: implement effective CSV delimiter resolution for imports - Added `resolve_effective_csv_delimiter` function to determine the correct delimiter based on the uploaded file and template. - Updated CSV import logic to utilize the new delimiter resolution method, ensuring accurate parsing of CSV files with varying delimiters. - Enhanced documentation to reflect changes in delimiter handling. - Added unit tests for the new delimiter resolution functionality. --- .../UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md | 1 + backend/csv_parser/core.py | 40 +++++++++++++++++++ backend/csv_parser/executor.py | 5 ++- backend/routers/csv_import.py | 5 ++- backend/tests/test_csv_parser_core.py | 15 +++++++ 5 files changed, 63 insertions(+), 3 deletions(-) diff --git a/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md b/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md index 5bdcea0..e47174c 100644 --- a/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md +++ b/.claude/docs/technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md @@ -18,6 +18,7 @@ Dieses Dokument ist **normativ für Agenten**, die ein neues Import-Zielmodul an | Admin-Systemvorlagen | `backend/routers/admin_csv_templates.py` | | Nutzer-Import (Profil-Mappings) | `backend/routers/csv_import.py` | | Vorlagen-Validierung (strukturell + Sample) | `backend/csv_parser/template_validator.py` (`validate_csv_template`) | +| Effektives Listentrennzeichen | `backend/csv_parser/core.py` (`resolve_effective_csv_delimiter`) — Datei kann `;` (z. B. Apple DE) haben, Vorlage `,` (EN); Import/Diagnose **nicht** nur das gespeicherte Trennzeichen blind nutzen. | **Single Source of Truth** für erlaubte Zielfelder, Typen und Duplikat-Keys ist **`module_registry.py`**. Keine parallele Feldliste in Routern duplizieren. diff --git a/backend/csv_parser/core.py b/backend/csv_parser/core.py index eb23a9f..444c9a1 100644 --- a/backend/csv_parser/core.py +++ b/backend/csv_parser/core.py @@ -47,6 +47,46 @@ def sniff_delimiter(sample_line: str) -> str: return best +def _csv_field_count(line: str, delimiter: str) -> int: + """Anzahl Felder in einer Zeile (csv.reader, berücksichtigt Anführungszeichen).""" + if not line or not line.strip(): + return 0 + try: + row = next(csv.reader(io.StringIO(line), delimiter=delimiter)) + except StopIteration: + return 0 + return len(row) + + +def resolve_effective_csv_delimiter(text: str, template_delimiter: str | None = None) -> str: + """ + Trennzeichen für die hochgeladene Datei wählen. Gespeicherte Vorlagen haben oft «,» + (Apple EN), tatsächliche Exporte je nach Region «;» (Apple DE / Excel) — mit falschem + Zeichen wird die Kopfzeile zu **einer** Spalte und das Mapping bricht vollständig. + """ + tpl = (template_delimiter or "").strip() + if tpl not in _DEFAULT_DELIMS: + tpl = None + + lines = _split_first_lines(text, max_lines=5) + if not lines: + return tpl or "," + + header = lines[0] + scores: list[tuple[int, str]] = [] + for d in _DEFAULT_DELIMS: + scores.append((_csv_field_count(header, d), d)) + + max_n = max(n for n, _ in scores) + if max_n <= 1: + return tpl or sniff_delimiter(header) + + at_max = [d for n, d in scores if n == max_n] + if tpl and tpl in at_max: + return tpl + return at_max[0] + + def _split_first_lines(text: str, max_lines: int = 5) -> List[str]: lines: List[str] = [] for line in text.splitlines(): diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index 67c78c7..6a63874 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -11,7 +11,7 @@ from typing import Any import logging -from csv_parser.core import iter_csv_dict_rows +from csv_parser.core import iter_csv_dict_rows, resolve_effective_csv_delimiter from csv_parser.import_row_processing import ( aggregate_mapped_rows, resolve_import_row_processing, @@ -97,7 +97,8 @@ def run_universal_csv_import( if tc is not None and not isinstance(tc, dict): tc = None - delim = mapping.get("delimiter") or "," + tpl_delim = str(mapping.get("delimiter") or ",").strip() or "," + delim = resolve_effective_csv_delimiter(text, tpl_delim) has_header = mapping.get("has_header", True) if module == "nutrition": diff --git a/backend/routers/csv_import.py b/backend/routers/csv_import.py index b3ab67a..c6dc535 100644 --- a/backend/routers/csv_import.py +++ b/backend/routers/csv_import.py @@ -29,6 +29,7 @@ from csv_parser.core import ( iter_csv_dict_rows, normalize_header_for_signature, parse_csv_sample, + resolve_effective_csv_delimiter, ) from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping from csv_parser.field_units import source_unit_choices_for_field @@ -393,7 +394,8 @@ async def csv_import_diagnose( tc = m.get("type_conversions") if not isinstance(tc, dict): tc = {} - delim = str(m.get("delimiter") or ",") + tpl_delim = str(m.get("delimiter") or ",").strip() or "," + delim = resolve_effective_csv_delimiter(text, tpl_delim) exec_module = str(m["module"]) rows_out: list[dict[str, Any]] = [] @@ -418,6 +420,7 @@ async def csv_import_diagnose( "mapping_id": mapping_id, "mapping_name": m.get("mapping_name"), "module": exec_module, + "delimiter_template": tpl_delim, "delimiter_used": delim, "has_header": bool(m.get("has_header", True)), "rows_diagnosed": len(rows_out), diff --git a/backend/tests/test_csv_parser_core.py b/backend/tests/test_csv_parser_core.py index 3e27673..917510f 100644 --- a/backend/tests/test_csv_parser_core.py +++ b/backend/tests/test_csv_parser_core.py @@ -11,6 +11,7 @@ from csv_parser.core import ( headers_signature_rank_metrics, get_csv_import_limits, iter_csv_dict_rows, + resolve_effective_csv_delimiter, ) from csv_parser.field_units import source_unit_choices_for_field from csv_parser.mapping_suggest import build_type_conversions_for_mapping @@ -29,6 +30,20 @@ def test_sniff_delimiter(): assert sniff_delimiter("a,b,c") == "," +def test_resolve_effective_csv_delimiter_semicolon_file_comma_template(): + """DE-Apple: «;» in der Datei, englische Vorlage speichert «,».""" + header = "Workout Type;Start;End;Duration;Aktive Energie (kJ)" + row = "Laufen;2026-04-17 16:25;2026-04-17 17:00;00:30:00;500" + text = header + "\n" + row + "\n" + assert resolve_effective_csv_delimiter(text, ",") == ";" + assert resolve_effective_csv_delimiter(text, None) == ";" + + +def test_resolve_effective_csv_delimiter_comma_file_keeps_template(): + text = "Workout Type,Start,End\nWalk,2026-04-17 16:25,2026-04-17 17:00\n" + assert resolve_effective_csv_delimiter(text, ",") == "," + + def test_parse_csv_sample_header(): text = "Date;kcal\n2024-01-01;2000\n" headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3) -- 2.43.0 From 6756dc60f37d94e4f6ccf810cc692a2a101b1aa9 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 18 Apr 2026 10:24:44 +0200 Subject: [PATCH 2/4] feat: enhance session metrics handling in activity summaries - Integrated compact JSON payload generation for session metrics in `get_training_sessions_recent_weeks_data`. - Updated the registration of activity session insights to reflect the new compact format for session metrics. - Improved documentation to clarify the structure and semantics of the session metrics in the JSON output. - Added normalization for prompt numbers to ensure consistent formatting in the metrics. --- backend/data_layer/activity_metrics.py | 19 +++- backend/data_layer/prompt_output_compact.py | 102 ++++++++++++++++++ .../activity_session_insights.py | 17 ++- backend/placeholder_resolver.py | 6 +- backend/tests/test_prompt_output_compact.py | 59 ++++++++++ 5 files changed, 188 insertions(+), 15 deletions(-) create mode 100644 backend/data_layer/prompt_output_compact.py create mode 100644 backend/tests/test_prompt_output_compact.py diff --git a/backend/data_layer/activity_metrics.py b/backend/data_layer/activity_metrics.py index 9c27451..ebb1731 100644 --- a/backend/data_layer/activity_metrics.py +++ b/backend/data_layer/activity_metrics.py @@ -25,6 +25,10 @@ import statistics from db import get_db, get_cursor, r2d from data_layer.activity_session_metrics import enrich_sessions_with_metrics from data_layer.utils import calculate_confidence, safe_float, safe_int, serialize_dates +from data_layer.prompt_output_compact import ( + normalize_prompt_number, + session_metrics_list_to_key_value_compact, +) def get_activity_summary_data( @@ -1094,6 +1098,10 @@ def get_training_sessions_recent_weeks_data( Letzte Wochen mit Einzeltrainings für KI-Kontext (Dauer, kcal, HF, Typ). weeks: Anzahl zurückliegender ISO-Kalenderwochen (Default 4). + + session_metrics pro Einheit: kompaktes Objekt ``{key: Wert}`` (keine wiederholten + Namen/Beschreibungen). Bedeutung der Keys: Platzhalter ``{{training_parameters_glossary_md}}``. + Zahlen werden für Prompt-Token kompakt gerundet. """ days = max(weeks * 7, 7) with get_db() as conn: @@ -1131,6 +1139,8 @@ def get_training_sessions_recent_weeks_data( "days_loaded": days, "session_count": 0, "confidence": "insufficient", + "session_metrics_shape": "key_value", + "metric_semantics_placeholder": "{{training_parameters_glossary_md}}", }, } @@ -1149,6 +1159,7 @@ def get_training_sessions_recent_weeks_data( kcal_f = float(kcal) if kcal is not None else None hr_a = r.get("hr_avg") hr_m = r.get("hr_max") + sm_compact = session_metrics_list_to_key_value_compact(r.get("session_metrics")) by_week[wk].append( { "id": str(r["id"]), @@ -1157,12 +1168,12 @@ def get_training_sessions_recent_weeks_data( "activity_type": r.get("activity_type"), "training_category": r.get("training_category"), "training_type_name": r.get("training_type_name"), - "duration_min": dur_f, - "kcal_active": kcal_f, + "duration_min": normalize_prompt_number(dur_f) if dur_f is not None else None, + "kcal_active": normalize_prompt_number(kcal_f) if kcal_f is not None else None, "hr_avg": int(hr_a) if hr_a is not None else None, "hr_max": int(hr_m) if hr_m is not None else None, "rpe": int(r["rpe"]) if r.get("rpe") is not None else None, - "session_metrics": r.get("session_metrics", []), + "session_metrics": sm_compact, } ) @@ -1177,6 +1188,8 @@ def get_training_sessions_recent_weeks_data( "days_loaded": days, "session_count": len(rows), "confidence": confidence, + "session_metrics_shape": "key_value", + "metric_semantics_placeholder": "{{training_parameters_glossary_md}}", }, } ) diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py new file mode 100644 index 0000000..d74994a --- /dev/null +++ b/backend/data_layer/prompt_output_compact.py @@ -0,0 +1,102 @@ +""" +Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen). + +- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision). +- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar. +- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar. +""" +from __future__ import annotations + +import math +from decimal import Decimal +from typing import Any + + +def compact_float_for_prompt(x: float) -> float | int: + """ + Reduziert unnötige Nachkommastellen; erhält kleine Beträge (<0,1) mit mehr Stellen. + """ + if not math.isfinite(x): + return x + ax = abs(x) + if ax == 0.0: + return 0 + if ax >= 100.0: + return int(round(x)) + if ax >= 10.0: + return int(round(x)) + if ax >= 1.0: + r = round(x, 2) + return int(r) if abs(r - int(round(r))) < 1e-6 else r + if ax >= 0.1: + r = round(x, 2) + return int(r) if abs(r - int(round(r))) < 1e-6 else r + if ax >= 0.01: + return round(x, 3) + return round(x, 4) + + +def normalize_prompt_number(x: Any) -> Any: + """int/Decimal/float kompakt; Rest unverändert.""" + if x is None: + return None + if isinstance(x, bool): + return x + if isinstance(x, int) and not isinstance(x, bool): + return x + if isinstance(x, Decimal): + try: + xf = float(x) + except Exception: + return x + return compact_float_for_prompt(xf) + if isinstance(x, float): + return compact_float_for_prompt(x) + return x + + +def compact_json_payload_for_prompts(obj: Any) -> Any: + """ + Tiefe Kopie mit kompakten Zahlen (dicts/list/tuples rekursiv). + Strings und dict-Keys werden nicht verändert. + """ + if obj is None: + return None + if isinstance(obj, dict): + return {k: compact_json_payload_for_prompts(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + t = [compact_json_payload_for_prompts(v) for v in obj] + return tuple(t) if isinstance(obj, tuple) else t + return normalize_prompt_number(obj) + + +def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]: + """ + Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen). + + Semantik: {{training_parameters_glossary_md}} im Prompt ergänzen. + """ + out: dict[str, Any] = {} + for m in metrics or []: + if not isinstance(m, dict): + continue + k = m.get("key") + if not k: + continue + v = m.get("value") + dt = (m.get("data_type") or "").lower() + if v is None: + out[str(k)] = None + continue + if dt == "integer": + try: + out[str(k)] = int(v) + except (TypeError, ValueError): + out[str(k)] = normalize_prompt_number(v) + elif dt == "boolean": + out[str(k)] = bool(v) + elif dt == "string": + out[str(k)] = str(v) + else: + out[str(k)] = normalize_prompt_number(v) + return out diff --git a/backend/placeholder_registrations/activity_session_insights.py b/backend/placeholder_registrations/activity_session_insights.py index 0e49eb9..5bbab48 100644 --- a/backend/placeholder_registrations/activity_session_insights.py +++ b/backend/placeholder_registrations/activity_session_insights.py @@ -130,8 +130,8 @@ def register_activity_session_insights(): key="training_sessions_recent_json", category="Aktivität", description=( - "JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics[] — gemergte Profil-Metriken " - "(dynamische Keys)" + "JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics als kompaktes " + "{key: Wert}-Objekt; Zahlen für Prompts gekürzt. Semantik: {{training_parameters_glossary_md}}." ), resolver_module="backend/placeholder_resolver.py", resolver_function="_safe_json", @@ -141,13 +141,10 @@ def register_activity_session_insights(): semantic_contract=( "Root: weeks[] mit week_iso; sessions[] pro Einheit u. a. id, date, activity_type, " "duration_min, kcal_active, hr_avg, hr_max, rpe, training_category, training_type_name, " - "session_metrics[]. " - "session_metrics: effektive Liste nach merge_column_backed_and_eav_metrics — Einträge mit " - "training_parameter_id, key, data_type, unit, value, name_de/name_en, description_de/description_en; " - "nur Parameter aus Attributschema " - "(training_category_parameter + training_type_parameter Overrides), keys sortiert. " - "Kanon Lesen: activity_log-Spalte vor EAV bei Konflikt. " - "meta: weeks_requested, days_loaded, session_count, confidence. " + "session_metrics (Objekt key→Wert, keine wiederholten Labels). " + "Merge wie merge_column_backed_and_eav_metrics; nur Keys aus Attributschema. " + "meta.session_metrics_shape=key_value, meta.metric_semantics_placeholder verweist auf Glossary-Platzhalter. " + "Alle JSON-Platzhalter mit _safe_json: Zahlen rekursiv kompakt gerundet. " "Default ca. 4 ISO-Wochen (28 Tage Rohdatenfenster)." ), business_meaning="Rohkontext für wochenweise Auswertung (Erholung, Intensität) in der KI", @@ -171,7 +168,7 @@ def register_activity_session_insights(): "session_metrics oft [] (kein Typ, kein Profil, keine gespeicherten Werte). " "Anzahl und Namen der Metrik-Keys sind instanz-/adminabhängig — JSON nicht als festes Schema " "für Downstream-Parsing harter Logik verwenden. " - "Für KI-Semantik zusätzlich {{training_parameters_glossary_md}} (gesamter aktiver Katalog) in den Prompt legen. " + "Pflicht für Metrik-Bedeutung: {{training_parameters_glossary_md}} (Katalog); im JSON keine Namen/Beschreibungen pro Session. " "Composite-Parameter (JSON in EAV) noch nicht im MVP expandiert; ggf. Roh-value_text in späterer Phase." ), layer_1_decision="activity_metrics.get_training_sessions_recent_weeks_data", diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py index 6f635c2..bdb248f 100644 --- a/backend/placeholder_resolver.py +++ b/backend/placeholder_resolver.py @@ -48,6 +48,8 @@ from data_layer.health_metrics import ( get_vo2_max_data ) +from data_layer.prompt_output_compact import compact_json_payload_for_prompts + from placeholder_registry import build_ai_placeholder_caption, get_registry # {{key|d}} — nur description anhängen; {{key|x}} — nur Erklärung (ai_caption / Registry) @@ -1028,8 +1030,8 @@ def _safe_json(func_name: str, profile_id: str) -> str: # If already string, return it; otherwise convert to JSON if isinstance(result, str): return result - else: - return json.dumps(result, ensure_ascii=False, default=str) + compacted = compact_json_payload_for_prompts(result) + return json.dumps(compacted, ensure_ascii=False, default=str) except Exception as e: print(f"[ERROR] _safe_json({func_name}, {profile_id}): {type(e).__name__}: {e}") traceback.print_exc() diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py new file mode 100644 index 0000000..cefae36 --- /dev/null +++ b/backend/tests/test_prompt_output_compact.py @@ -0,0 +1,59 @@ +"""Tests für data_layer.prompt_output_compact (KI-Platzhalter, Token).""" + +import pytest + +from data_layer.prompt_output_compact import ( + compact_float_for_prompt, + compact_json_payload_for_prompts, + normalize_prompt_number, + session_metrics_list_to_key_value_compact, +) + + +@pytest.mark.parametrize( + "x,expected", + [ + (0.0, 0), + (123.456, 123), + (45.67, 46), + (9.876, 9.88), + (0.99, 0.99), + (0.055, 0.055), + (0.01234, 0.012), + ], +) +def test_compact_float_for_prompt(x, expected): + out = compact_float_for_prompt(x) + if isinstance(expected, float): + assert abs(float(out) - expected) < 0.0001 + else: + assert out == expected + + +def test_compact_json_nested(): + raw = {"a": 12.345678, "b": {"c": 0.0666}, "d": [1.111, 2.0]} + out = compact_json_payload_for_prompts(raw) + assert out["a"] == 12 + assert abs(out["b"]["c"] - 0.067) < 0.001 + assert out["d"][0] == 1.11 + + +def test_session_metrics_key_value_only(): + sm = [ + { + "key": "rpe", + "data_type": "integer", + "value": 7, + "name_de": "RPE", + "description_de": "lang", + }, + { + "key": "watts", + "data_type": "float", + "value": 199.999, + "unit": "W", + }, + ] + out = session_metrics_list_to_key_value_compact(sm) + assert out == {"rpe": 7, "watts": 200} + assert "name_de" not in str(out) -- 2.43.0 From 178534e9eb791541e4f93ff868da4963b3293698 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 18 Apr 2026 10:32:29 +0200 Subject: [PATCH 3/4] feat: enhance formatting and normalization of activity metrics - Introduced `format_scalar_for_prompt_text` function to standardize the representation of scalar values in activity summaries and details. - Updated `get_activity_summary` and `get_activity_detail` functions to utilize the new formatting for improved readability. - Added normalization for float values in session metrics to prevent excessively long representations. - Enhanced unit tests to verify the new formatting and normalization behavior. --- .../data_layer/activity_session_metrics.py | 24 ++++++++++++++ backend/data_layer/prompt_output_compact.py | 25 +++++++++++++++ backend/placeholder_resolver.py | 19 ++++++++--- .../tests/test_activity_session_metrics.py | 32 +++++++++++++++++++ backend/tests/test_prompt_output_compact.py | 7 ++++ 5 files changed, 103 insertions(+), 4 deletions(-) diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py index 6894559..2681b68 100644 --- a/backend/data_layer/activity_session_metrics.py +++ b/backend/data_layer/activity_session_metrics.py @@ -13,9 +13,31 @@ from data_layer.activity_data_canon import ( ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM, ACTIVITY_MODULE_REGISTRY_FIELD_KEYS, ) +from data_layer.prompt_output_compact import normalize_prompt_number logger = logging.getLogger(__name__) + +def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any: + """Lesepfad (Layer 1): keine unnötig langen Float-Strings für KI/UI (Issue 53 / Platzhalter).""" + if val is None: + return None + dt = (data_type or "").strip().lower() + if dt == "string": + return val + if dt == "boolean": + return bool(val) + if dt == "integer": + try: + if isinstance(val, bool): + return int(val) + return int(val) + except (TypeError, ValueError): + return normalize_prompt_number(val) + if dt == "float": + return normalize_prompt_number(val) + return normalize_prompt_number(val) + # Diese Spalten nicht aus CSV-Parameter-Zuordnung überschreiben (kommen aus Typ-Mapping / System). ACTIVITY_LOG_PATCH_FORBIDDEN = frozenset( { @@ -430,6 +452,8 @@ def merge_column_backed_and_eav_metrics( keys_handled.add(k) merged.sort(key=lambda x: x["key"]) + for m in merged: + m["value"] = _normalize_metric_value_for_read(m.get("data_type") or "", m.get("value")) return merged diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py index d74994a..7949c6d 100644 --- a/backend/data_layer/prompt_output_compact.py +++ b/backend/data_layer/prompt_output_compact.py @@ -70,6 +70,31 @@ def compact_json_payload_for_prompts(obj: Any) -> Any: return normalize_prompt_number(obj) +def format_scalar_for_prompt_text(x: Any) -> str: + """ + Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …). + Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number). + """ + if x is None: + return "—" + if isinstance(x, bool): + return "ja" if x else "nein" + if isinstance(x, str): + return x + n = normalize_prompt_number(x) + if isinstance(n, bool): + return "ja" if n else "nein" + if isinstance(n, int) and not isinstance(n, bool): + return str(n) + if isinstance(n, float): + if not math.isfinite(n): + return str(n) + if abs(n - round(n)) < 1e-9: + return str(int(round(n))) + return str(n) + return str(n) + + def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]: """ Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen). diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py index bdb248f..8f8973d 100644 --- a/backend/placeholder_resolver.py +++ b/backend/placeholder_resolver.py @@ -28,6 +28,8 @@ from data_layer.nutrition_metrics import ( get_nutrition_days_data, get_protein_targets_data ) +from data_layer.prompt_output_compact import format_scalar_for_prompt_text + from data_layer.activity_metrics import ( get_activity_summary_data, get_activity_detail_data, @@ -350,7 +352,11 @@ def get_activity_summary(profile_id: str, days: int = 14) -> str: if data['confidence'] == 'insufficient': return f"Keine Aktivitäten in den letzten {days} Tagen" - return f"{data['activity_count']} Einheiten in {days} Tagen (Ø {data['avg_duration_min']} min/Einheit, {data['total_kcal']} kcal gesamt)" + return ( + f"{data['activity_count']} Einheiten in {days} Tagen (Ø " + f"{format_scalar_for_prompt_text(data['avg_duration_min'])} min/Einheit, " + f"{format_scalar_for_prompt_text(data['total_kcal'])} kcal gesamt)" + ) def calculate_age(dob) -> str: @@ -423,18 +429,23 @@ def get_activity_detail(profile_id: str, days: int = 14) -> str: # Format as readable list (max 20 entries to avoid token bloat) lines = [] for activity in data["activities"][:20]: - hr_str = f", HF={activity['hr_avg']}" if activity.get("hr_avg") else "" + hr_str = ( + f", HF={format_scalar_for_prompt_text(activity['hr_avg'])}" + if activity.get("hr_avg") is not None + else "" + ) eav_parts = [] for m in activity.get("session_metrics") or []: k, v = m.get("key"), m.get("value") if k is None or v is None: continue label = m.get("name_de") or m.get("name_en") or k - eav_parts.append(f"{label} ({k})={v}") + eav_parts.append(f"{label} ({k})={format_scalar_for_prompt_text(v)}") eav_str = f" | EAV: {'; '.join(eav_parts)}" if eav_parts else "" lines.append( f"{activity['date']}: {activity['activity_type']} " - f"({activity['duration_min']}min, {activity['kcal_active']}kcal{hr_str}{eav_str})" + f"({format_scalar_for_prompt_text(activity['duration_min'])}min, " + f"{format_scalar_for_prompt_text(activity['kcal_active'])}kcal{hr_str}{eav_str})" ) return "\n".join(lines) diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py index a2bc11a..0de2bdf 100644 --- a/backend/tests/test_activity_session_metrics.py +++ b/backend/tests/test_activity_session_metrics.py @@ -121,6 +121,38 @@ def test_merge_parameter_schema_includes_descriptions(): assert merged[0]["description_en"] == "5 min average power" +def test_merge_eav_float_value_normalized_no_long_tail(): + """Layer 1: lange Floats (z. B. kcal_per_km) für Lesepfad kompakt.""" + schema = [ + { + "training_parameter_id": 1, + "key": "kcal_per_km", + "data_type": "float", + "unit": "kcal/km", + "validation_rules": {}, + "source_field": None, + "name_de": "Kcal/km", + "name_en": "kcal/km", + "description_de": None, + "description_en": None, + "param_category": "performance", + } + ] + eav = [ + { + "training_parameter_id": 1, + "key": "kcal_per_km", + "data_type": "float", + "unit": "kcal/km", + "value": 51.5818181818181818, + } + ] + out = merge_column_backed_and_eav_metrics({}, schema, eav) + assert len(out) == 1 + v = out[0]["value"] + assert "581818" not in repr(v) + + def test_merge_column_backed_includes_human_labels_from_schema(): schema = [ { diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py index cefae36..f00b627 100644 --- a/backend/tests/test_prompt_output_compact.py +++ b/backend/tests/test_prompt_output_compact.py @@ -5,6 +5,7 @@ import pytest from data_layer.prompt_output_compact import ( compact_float_for_prompt, compact_json_payload_for_prompts, + format_scalar_for_prompt_text, normalize_prompt_number, session_metrics_list_to_key_value_compact, ) @@ -38,6 +39,12 @@ def test_compact_json_nested(): assert out["d"][0] == 1.11 +def test_format_scalar_no_long_float_tail(): + s = format_scalar_for_prompt_text(51.5818181818181818) + assert "181818" not in s + assert len(s) <= 8 + + def test_session_metrics_key_value_only(): sm = [ { -- 2.43.0 From 7676897fda1f4c604fa638028d14f2214ec9c653 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 18 Apr 2026 10:43:21 +0200 Subject: [PATCH 4/4] feat: enhance normalization of metric values for improved handling - Updated `_normalize_metric_value_for_read` to compact numeric strings and ensure consistent formatting for string data types. - Enhanced `normalize_prompt_number` to handle numeric strings and non-finite float values effectively. - Improved unit tests to validate the new normalization behavior for session metrics and scalar formatting. --- .../data_layer/activity_session_metrics.py | 2 +- backend/data_layer/prompt_output_compact.py | 43 +++++++++++++++---- .../tests/test_activity_session_metrics.py | 6 +++ backend/tests/test_prompt_output_compact.py | 23 ++++++++++ 4 files changed, 64 insertions(+), 10 deletions(-) diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py index 2681b68..0726ff3 100644 --- a/backend/data_layer/activity_session_metrics.py +++ b/backend/data_layer/activity_session_metrics.py @@ -24,7 +24,7 @@ def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any: return None dt = (data_type or "").strip().lower() if dt == "string": - return val + return normalize_prompt_number(val) if dt == "boolean": return bool(val) if dt == "integer": diff --git a/backend/data_layer/prompt_output_compact.py b/backend/data_layer/prompt_output_compact.py index 7949c6d..8afa08c 100644 --- a/backend/data_layer/prompt_output_compact.py +++ b/backend/data_layer/prompt_output_compact.py @@ -4,10 +4,14 @@ Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen). - Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision). - ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar. - Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar. + +Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen — +diese werden explizit mit float() normalisiert. """ from __future__ import annotations import math +import re from decimal import Decimal from typing import Any @@ -37,22 +41,45 @@ def compact_float_for_prompt(x: float) -> float | int: def normalize_prompt_number(x: Any) -> Any: - """int/Decimal/float kompakt; Rest unverändert.""" + """int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert.""" if x is None: return None if isinstance(x, bool): return x if isinstance(x, int) and not isinstance(x, bool): return x + if isinstance(x, str): + s = x.strip() + if not s: + return x + try: + if re.fullmatch(r"-?\d+", s): + return int(s) + xf = float(s) + except ValueError: + return x + if not math.isfinite(xf): + return x + return compact_float_for_prompt(xf) if isinstance(x, Decimal): try: xf = float(x) except Exception: return x + if not math.isfinite(xf): + return x return compact_float_for_prompt(xf) if isinstance(x, float): + if not math.isfinite(x): + return x return compact_float_for_prompt(x) - return x + try: + xf = float(x) + except (TypeError, ValueError): + return x + if not math.isfinite(xf): + return x + return compact_float_for_prompt(xf) def compact_json_payload_for_prompts(obj: Any) -> Any: @@ -73,25 +100,23 @@ def compact_json_payload_for_prompts(obj: Any) -> Any: def format_scalar_for_prompt_text(x: Any) -> str: """ Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …). - Nutzt dieselbe Komprimierung wie JSON (normalize_prompt_number). + Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife). """ if x is None: return "—" if isinstance(x, bool): return "ja" if x else "nein" - if isinstance(x, str): - return x n = normalize_prompt_number(x) if isinstance(n, bool): return "ja" if n else "nein" + if isinstance(n, str): + return n if isinstance(n, int) and not isinstance(n, bool): return str(n) if isinstance(n, float): if not math.isfinite(n): return str(n) - if abs(n - round(n)) < 1e-9: - return str(int(round(n))) - return str(n) + return "%g" % n return str(n) @@ -121,7 +146,7 @@ def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict elif dt == "boolean": out[str(k)] = bool(v) elif dt == "string": - out[str(k)] = str(v) + out[str(k)] = normalize_prompt_number(v) else: out[str(k)] = normalize_prompt_number(v) return out diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py index 0de2bdf..dacb44d 100644 --- a/backend/tests/test_activity_session_metrics.py +++ b/backend/tests/test_activity_session_metrics.py @@ -6,6 +6,7 @@ from unittest.mock import patch import pytest from data_layer.activity_session_metrics import ( + _normalize_metric_value_for_read, ActivitySessionMetricsError, enrich_sessions_with_metrics, merge_column_backed_and_eav_metrics, @@ -206,6 +207,11 @@ def test_row_value_tuple_mapping(): assert _row_value_tuple("boolean", True) == (None, None, None, True) +def test_normalize_metric_string_dtype_compacts_numeric_strings(): + assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52 + assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext" + + class _FakeCursor: """Sequences fetchone/fetchall for resolve_activity_attribute_schema.""" diff --git a/backend/tests/test_prompt_output_compact.py b/backend/tests/test_prompt_output_compact.py index f00b627..7789ce8 100644 --- a/backend/tests/test_prompt_output_compact.py +++ b/backend/tests/test_prompt_output_compact.py @@ -45,6 +45,29 @@ def test_format_scalar_no_long_float_tail(): assert len(s) <= 8 +def test_format_scalar_numeric_string_no_long_tail(): + s = format_scalar_for_prompt_text("51.581818181818181818") + assert "181818" not in s + + +def test_session_metrics_string_dtype_compacts_numeric_strings(): + sm = [ + { + "key": "temp_c", + "data_type": "string", + "value": "22.333333333333336", + }, + { + "key": "kcal_per_km", + "data_type": "string", + "value": "51.581818181818181818", + }, + ] + out = session_metrics_list_to_key_value_compact(sm) + assert out["temp_c"] == 22 + assert out["kcal_per_km"] == 52 + + def test_session_metrics_key_value_only(): sm = [ { -- 2.43.0