From 5b96bd4f75bae47a84f319e3eefb1c2ea6b9cae7 Mon Sep 17 00:00:00 2001 From: Lars Date: Fri, 10 Apr 2026 16:43:00 +0200 Subject: [PATCH] feat(csv-import): Add blood pressure and activity row diagnosis functionality - Introduced `diagnose_blood_pressure_row` and `diagnose_activity_row` functions to validate and analyze blood pressure and activity data from CSV imports. - Updated the CSV import logic to handle combined datetime columns for blood pressure and activity, improving data integrity during import. - Enhanced type conversion specifications to include `start_time` for blood pressure and activity, ensuring accurate data mapping. - Added tests to validate the new diagnosis functions and their integration with existing import processes, ensuring robustness and reliability. - Updated frontend messages to provide clearer guidance on blood pressure and activity data handling during CSV imports. --- backend/csv_parser/core.py | 7 +- backend/csv_parser/executor.py | 98 ++++++++++++++++ backend/csv_parser/mapping_suggest.py | 1 + backend/csv_parser/module_registry.py | 4 + backend/csv_parser/type_converter.py | 109 +++++++++++++++++- backend/routers/csv_import.py | 11 +- backend/tests/test_csv_import_executor.py | 44 +++++++ backend/tests/test_csv_parser_core.py | 23 ++++ frontend/src/pages/UniversalCsvImportPage.jsx | 7 +- 9 files changed, 295 insertions(+), 9 deletions(-) diff --git a/backend/csv_parser/core.py b/backend/csv_parser/core.py index 3387bd1..ed449b8 100644 --- a/backend/csv_parser/core.py +++ b/backend/csv_parser/core.py @@ -190,7 +190,12 @@ def iter_csv_dict_rows( *, has_header: bool = True, ) -> Iterator[Dict[str, str]]: - """Vollständige Datei zeilenweise als Dict (Header = Keys).""" + """ + Vollständige Datei zeilenweise als Dict (Header = Keys). + Spaltenreihenfolge ist egal; zusätzliche Spalten werden ignoriert, wenn sie nicht + in field_mappings vorkommen. Keine Obergrenze für die Spaltenanzahl (nur Zeilenlimits + kommen aus system_config / Import-Router). + """ if not has_header: raise ValueError("CSV ohne Kopfzeile wird für Import noch nicht unterstützt") normalized = text.replace("\r\n", "\n").replace("\r", "\n") diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index 6d7d4d1..540c0ba 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -397,6 +397,87 @@ def _import_weight( } +def diagnose_blood_pressure_row(mapped_typed: dict[str, Any]) -> dict[str, Any]: + """Zeigt, ob Datum/Zeit nach Vorlage + Alias + Apple-Start-Spalte erkannt werden.""" + md = coerce_date(mapped_typed.get("measured_date")) + mt = mapped_typed.get("measured_time") + st_combined = mapped_typed.get("start_time") + if isinstance(st_combined, dt.datetime): + if md is None: + md = st_combined.date() + if mt is None: + mt = st_combined.time() + elif isinstance(st_combined, str) and st_combined.strip() and (md is None or mt is None): + try: + from dateutil import parser as du_parser + + dtp = du_parser.parse(st_combined.strip()) + if md is None: + md = dtp.date() + if mt is None: + mt = dtp.time() + except (ValueError, TypeError, OverflowError): + pass + sys_v = mapped_typed.get("systolic") + dia_v = mapped_typed.get("diastolic") + try: + int(sys_v) + int(dia_v) + ok_bp = True + except (TypeError, ValueError): + ok_bp = False + return { + "measured_date_iso": md.isoformat() if md else None, + "has_measured_time": mt is not None, + "start_time_raw_type": type(st_combined).__name__ if st_combined is not None else None, + "systolic_ok": ok_bp, + "would_reach_insert_check": md is not None and mt is not None, + } + + +def diagnose_activity_row(mapped_typed: dict[str, Any]) -> dict[str, Any]: + activity_type = mapped_typed.get("activity_type") + start_raw = mapped_typed.get("start_time") + date_d = coerce_date(mapped_typed.get("date")) + start_key: str | None = None + fail_hint: str | None = None + if isinstance(start_raw, dt.datetime): + start_key = start_raw.strftime("%Y-%m-%d %H:%M:%S") + if date_d is None: + date_d = start_raw.date() + elif isinstance(start_raw, dt.time): + if date_d is None: + fail_hint = "startzeit_ohne_datum" + else: + start_key = f"{date_d.isoformat()} {start_raw.strftime('%H:%M:%S')}" + elif isinstance(start_raw, str) and start_raw.strip(): + s = start_raw.strip() + if date_d is not None and _looks_like_time_only(s): + start_key = f"{date_d.isoformat()} {s}" + else: + start_key = s + if date_d is None and len(start_key) >= 10: + for fmt in ("%Y-%m-%d", "%d.%m.%Y"): + try: + date_d = dt.datetime.strptime(start_key[:10], fmt).date() + break + except ValueError: + continue + has_type = bool(activity_type and str(activity_type).strip()) + ok = has_type and date_d is not None and bool(start_key) + if fail_hint is None and not has_type: + fail_hint = "trainingsart_fehlt" + elif fail_hint is None and not ok: + fail_hint = "datum_start_fehlt" + return { + "activity_type_preview": (str(activity_type).strip()[:80] if activity_type else None), + "date_iso": date_d.isoformat() if date_d else None, + "start_key_preview": (start_key[:80] if start_key else None), + "would_pass_row_gate": ok, + "fail_hint": fail_hint, + } + + def _import_blood_pressure( cur, profile_id: str, @@ -417,6 +498,23 @@ def _import_blood_pressure( mapped = build_row_after_mapping(csv_row, fm, tc, module="blood_pressure") md = coerce_date(mapped.get("measured_date")) mt = mapped.get("measured_time") + st_combined = mapped.get("start_time") + if isinstance(st_combined, dt.datetime): + if md is None: + md = st_combined.date() + if mt is None: + mt = st_combined.time() + elif isinstance(st_combined, str) and st_combined.strip() and (md is None or mt is None): + try: + from dateutil import parser as du_parser + + dtp = du_parser.parse(st_combined.strip()) + if md is None: + md = dtp.date() + if mt is None: + mt = dtp.time() + except (ValueError, TypeError, OverflowError): + pass if md is None: error_details.append({"row": rows_total, "error": "Datum fehlt"}) continue diff --git a/backend/csv_parser/mapping_suggest.py b/backend/csv_parser/mapping_suggest.py index 20304b0..57f42f7 100644 --- a/backend/csv_parser/mapping_suggest.py +++ b/backend/csv_parser/mapping_suggest.py @@ -71,6 +71,7 @@ _DEFAULT_TYPE_CONVERSIONS: dict[str, dict[str, dict[str, Any]]] = { "blood_pressure": { "measured_date": {"type": "date", "format": "dd.mm.yyyy", "flexible": True}, "measured_time": {"type": "time", "format": "HH:MM", "flexible": True}, + "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}, "systolic": {"type": "int", "flexible": True}, "diastolic": {"type": "int", "flexible": True}, "pulse": {"type": "int", "flexible": True}, diff --git a/backend/csv_parser/module_registry.py b/backend/csv_parser/module_registry.py index 20a20e7..2a07fee 100644 --- a/backend/csv_parser/module_registry.py +++ b/backend/csv_parser/module_registry.py @@ -86,6 +86,8 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = { "fields": { "measured_date": {"type": "date", "required": True}, "measured_time": {"type": "time", "required": True}, + # Apple Health: eine Spalte „Start“ / „Datum/Uhrzeit“ (Datetime); Executor splittet. + "start_time": {"type": "datetime", "required": False}, "systolic": {"type": "int", "required": True}, "diastolic": {"type": "int", "required": True}, "pulse": {"type": "int", "required": False}, @@ -152,6 +154,8 @@ def validate_required_field_targets(module: str, field_mappings: dict) -> None: raise ValueError(f"Unbekanntes Modul: {module}") field_defs = cast(dict, mod["fields"]) targets = {v for v in field_mappings.values() if v and v not in ("-", "_skip")} + if module == "blood_pressure" and "start_time" in targets: + targets = set(targets) | {"measured_date", "measured_time"} for fname, finfo in field_defs.items(): if finfo.get("required") and fname not in targets: raise ValueError(f"Pflicht-Zielfeld nicht zugeordnet: {fname}") diff --git a/backend/csv_parser/type_converter.py b/backend/csv_parser/type_converter.py index d5ebaeb..4d02d91 100644 --- a/backend/csv_parser/type_converter.py +++ b/backend/csv_parser/type_converter.py @@ -284,6 +284,15 @@ def _parse_int(raw: str, spec: Mapping[str, Any]) -> int: s = raw.strip() if bool(spec.get("flexible")) or spec.get("thousands_separator") == "auto": s2 = _normalize_num_token(s) + if not s2 or s2 in ("-", "—", "–"): + raise ValueError("leer") + # EU-Dezimal (z. B. Apple DE «37,26» für HRV) — nicht alle Ziffern konkatenieren (würde 3726 → CHECK). + if "," in s2 or "." in s2: + try: + fv = _parse_float_auto(s2) + return int(round(fv)) + except (ValueError, InvalidOperation): + pass neg = s2.startswith("-") body = s2[1:] if neg else s2 digits = re.sub(r"\D", "", body) @@ -425,6 +434,79 @@ def _vitals_baseline_alias_db_field(csv_col: str) -> str | None: return None +def _blood_pressure_alias_db_field(csv_col: str) -> str | None: + """ + Omron (schmal) vs. Apple-Gesundheit (Breitexport): unterschiedliche Spaltennamen; + kombinierte Messzeit oft als „Start“ oder „Datum/Uhrzeit“. + """ + n = normalize_header_for_signature(str(csv_col)) + low = str(csv_col).lower() + if n in ("datum_uhrzeit", "datetime", "date_time", "messzeitpunkt"): + return "start_time" + if n in ("start", "beginn"): + return "start_time" + if n in ("datum", "date", "messdatum"): + return "measured_date" + if n in ("zeit", "time", "uhrzeit"): + return "measured_time" + if "systolisch" in n or ("blutdruck" in n and "systol" in low) or n.startswith("systolic"): + return "systolic" + if "diastolisch" in n or ("blutdruck" in n and "diastol" in low) or n.startswith("diastolic"): + return "diastolic" + if n.startswith("puls") or n.startswith("pulse") or "puls_" in n: + return "pulse" + return None + + +def _activity_alias_db_field(csv_col: str) -> str | None: + """ + Apple-Workout schmal vs. Breitexport (viele Spalten): Trainingsart/Dauer/Strecke + trotzdem zuverlässig erkennen. + """ + n = normalize_header_for_signature(str(csv_col)) + low = str(csv_col).lower() + if n in ("trainingsart", "workout_type", "activity_type", "workouttype"): + return "activity_type" + if ("trainings" in n and "art" in n) or ("workout" in low and "type" in low): + return "activity_type" + if n in ("datum_uhrzeit", "start", "beginn", "startzeit", "von"): + return "start_time" + if n in ("ende", "end", "endzeit", "bis"): + return "end_time" + if n in ("date", "datum"): + return "date" + if "dauer" in n or n == "duration" or n.startswith("duration_"): + return "duration_min" + if ("strecke" in n or "distance" in low) and ("km" in low or "(km" in low or " km" in low): + return "distance_km" + if "aktive_energie" in n or "active_energy" in n: + return "kcal_active" + if "ruheenergie" in n or "resting_energy" in n: + return "kcal_resting" + if ("herzfrequenz" in n or "heart_rate" in n) and ("max" in low or "max" in n): + return "hr_max" + if ( + "durchschnittliche_herzfrequenz" in n + or "heart_rate_average" in n + or ("herzfrequenz" in n and ("durchschn" in n or "avg" in low or "average" in low)) + or ("heart_rate" in n and ("avg" in low or "average" in low)) + ): + return "hr_avg" + return None + + +def _effective_conversion_spec( + db_field: str, + spec: Mapping[str, Any] | None, + module: str | None, +) -> Mapping[str, Any] | None: + if spec is not None: + return spec + if module == "blood_pressure" and db_field == "start_time": + return {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True} + return None + + def build_row_after_mapping( csv_row: Mapping[str, str], field_mappings: Mapping[str, str], @@ -434,6 +516,9 @@ def build_row_after_mapping( """ Wendet Zuordnung csv_spalte → db_feld und Typkonvertierung an. Unzugeordnete oder „—“ werden übersprungen. + Die Reihenfolge der Spalten in der CSV spielt keine Rolle (Dict-Zugriff nach Name). + Falls mehrere Spalten auf dasselbe db_field abbilden, gewinnt die zuletzt verarbeitete + (iterreihenfolge = Kopfzeilen-Reihenfolge in der Datei) — in der Praxis selten. """ out: dict[str, Any] = {} tc = type_conversions or {} @@ -441,9 +526,16 @@ def build_row_after_mapping( db_field = _lookup_db_field(str(csv_col), field_mappings) if not db_field and module == "vitals_baseline": db_field = _vitals_baseline_alias_db_field(csv_col) + elif not db_field and module == "blood_pressure": + db_field = _blood_pressure_alias_db_field(csv_col) + elif not db_field and module == "activity": + db_field = _activity_alias_db_field(csv_col) if not db_field: continue - spec = tc.get(db_field) + raw_spec = tc.get(db_field) if isinstance(tc, dict) else None + if not isinstance(raw_spec, dict): + raw_spec = None + spec = _effective_conversion_spec(db_field, raw_spec, module) try: out[db_field] = convert_value( raw, db_field, spec if isinstance(spec, dict) else None, module=module @@ -460,11 +552,13 @@ def diagnose_row_mapping( module: str | None = None, *, mapped_typed: Mapping[str, Any] | None = None, - max_columns: int = 96, + max_columns: int = 512, ) -> dict[str, Any]: """ - Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Vital-Alias), Konvertierung pro Spalte, + Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Alias), Konvertierung pro Spalte, Ergebnis wie build_row_after_mapping (json-freundliche Vorschau). + max_columns begrenzt nur die Länge der Liste „per_column“ in der Antwort — der echte + Import verarbeitet alle Spalten (siehe iter_csv_dict_rows / build_row_after_mapping). """ tc = type_conversions or {} per_column: list[dict[str, Any]] = [] @@ -478,9 +572,16 @@ def diagnose_row_mapping( via_a = None if not via_t and module == "vitals_baseline": via_a = _vitals_baseline_alias_db_field(sc) + elif not via_t and module == "blood_pressure": + via_a = _blood_pressure_alias_db_field(sc) + elif not via_t and module == "activity": + via_a = _activity_alias_db_field(sc) target = via_t or via_a src = "template" if via_t else ("alias" if via_a else "none") - spec = tc.get(target) if target else None + raw_spec = tc.get(target) if isinstance(tc, dict) and target else None + if not isinstance(raw_spec, dict): + raw_spec = None + spec = _effective_conversion_spec(target, raw_spec, module) if target else None conv_err: str | None = None conv_preview: Any = None if target: diff --git a/backend/routers/csv_import.py b/backend/routers/csv_import.py index 5b49b6f..2a4b7d4 100644 --- a/backend/routers/csv_import.py +++ b/backend/routers/csv_import.py @@ -15,7 +15,12 @@ from auth import require_auth, check_feature_access, increment_feature_usage from feature_logger import log_feature_usage from db import get_db, get_cursor, r2d from routers.profiles import get_pid -from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import +from csv_parser.executor import ( + diagnose_activity_row, + diagnose_blood_pressure_row, + diagnose_vitals_row, + run_universal_csv_import, +) from csv_parser.core import ( decode_raw_bytes, column_signature, @@ -392,6 +397,10 @@ async def csv_import_diagnose( } if exec_module == "vitals_baseline": entry["vitals"] = diagnose_vitals_row(typed) + elif exec_module == "blood_pressure": + entry["blood_pressure"] = diagnose_blood_pressure_row(typed) + elif exec_module == "activity": + entry["activity"] = diagnose_activity_row(typed) rows_out.append(entry) return { diff --git a/backend/tests/test_csv_import_executor.py b/backend/tests/test_csv_import_executor.py index 63978ce..f5b10cd 100644 --- a/backend/tests/test_csv_import_executor.py +++ b/backend/tests/test_csv_import_executor.py @@ -7,6 +7,7 @@ pytest-Lauf mitlaufen. from __future__ import annotations +import datetime as dt import uuid import pytest @@ -317,6 +318,49 @@ def test_run_universal_import_weight_two_rows_same_day_last_value(): assert params[3] == 83.5 +def test_activity_alias_maps_german_workout_wide_columns(): + from csv_parser.type_converter import build_row_after_mapping + + row = { + "Datum/Uhrzeit": "2026-04-03 08:00:00", + "Trainingsart": "Laufen", + "Dauer": "0:45:00", + } + fm = {"Workout Type": "activity_type"} + tc = { + "activity_type": {"type": "string"}, + "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}, + "duration_min": { + "type": "duration", + "format": "HH:MM:SS", + "target_unit": "minutes", + "flexible": True, + }, + } + out = build_row_after_mapping(row, fm, tc, module="activity") + assert str(out.get("activity_type")) == "Laufen" + assert isinstance(out.get("start_time"), dt.datetime) + assert out.get("duration_min") == 45.0 + + +def test_blood_pressure_alias_combined_datetime_column(): + from csv_parser.type_converter import build_row_after_mapping + + row = { + "Datum/Uhrzeit": "2026-04-03 10:30:00", + "Systolisch (mmHg)": "120", + "Diastolisch (mmHg)": "80", + } + fm = {"Date": "measured_date"} + tc = { + "systolic": {"type": "int", "flexible": True}, + "diastolic": {"type": "int", "flexible": True}, + } + out = build_row_after_mapping(row, fm, tc, module="blood_pressure") + assert isinstance(out.get("start_time"), dt.datetime) + assert int(out.get("systolic")) == 120 + + def test_diagnose_vitals_row_and_mapping_smoke(): fm = { "Datum/Uhrzeit": "date", diff --git a/backend/tests/test_csv_parser_core.py b/backend/tests/test_csv_parser_core.py index 4a5c9b0..54efa13 100644 --- a/backend/tests/test_csv_parser_core.py +++ b/backend/tests/test_csv_parser_core.py @@ -210,6 +210,29 @@ def test_int_flexible_thousands(): assert convert_value("1.234", "n", {"type": "int", "flexible": True}) == 1234 +def test_build_row_after_mapping_column_order_independent(): + fm = {"Spalte B": "resting_hr", "Spalte A": "date"} + tc = { + "date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True}, + "resting_hr": {"type": "int", "flexible": True}, + } + r1 = build_row_after_mapping( + {"Spalte A": "2026-01-15", "Spalte B": "58"}, fm, tc, module="vitals_baseline" + ) + r2 = build_row_after_mapping( + {"Spalte B": "58", "Spalte A": "2026-01-15"}, fm, tc, module="vitals_baseline" + ) + assert r1 == r2 + assert r1["resting_hr"] == 58 + + +def test_int_flexible_german_decimal_rounds(): + """Apple-DE: HRV/SpO2 als «37,26» / «95,22» — nicht 3726 aus Ziffern konkatenieren.""" + spec = {"type": "int", "flexible": True} + assert convert_value("37,26", "hrv", spec) == 37 + assert convert_value("95,22", "spo2", spec) == 95 + + def test_datetime_flexible(): spec = {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True} dtv = convert_value("15.01.2024 14:30:00", "t", spec) diff --git a/frontend/src/pages/UniversalCsvImportPage.jsx b/frontend/src/pages/UniversalCsvImportPage.jsx index 383c260..554180c 100644 --- a/frontend/src/pages/UniversalCsvImportPage.jsx +++ b/frontend/src/pages/UniversalCsvImportPage.jsx @@ -556,9 +556,10 @@ export default function UniversalCsvImportPage() {

Vorlage #{diagnoseResult.mapping_id} · {diagnoseResult.mapping_name} · Modul{' '} - {MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Bei Vitalwerten: pro Zeile{' '} - vitals.would_pass_prefilter und{' '} - prefilter_fail_reason prüfen (z. B. datum_fehlt). + {MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Hinweise: Vitalwerte{' '} + vitals.*, Blutdruck blood_pressure.*, Workouts{' '} + activity.* (z. B. would_pass_row_gate /{' '} + prefilter_fail_reason).