feat(csv-import): Add blood pressure and activity row diagnosis functionality

- Introduced `diagnose_blood_pressure_row` and `diagnose_activity_row` functions to validate and analyze blood pressure and activity data from CSV imports. - Updated the CSV import logic to handle combined datetime columns for blood pressure and activity, improving data integrity during import. - Enhanced type conversion specifications to include `start_time` for blood pressure and activity, ensuring accurate data mapping. - Added tests to validate the new diagnosis functions and their integration with existing import processes, ensuring robustness and reliability. - Updated frontend messages to provide clearer guidance on blood pressure and activity data handling during CSV imports.
2026-04-10 16:43:00 +02:00 · 2026-04-10 16:43:00 +02:00 · 5b96bd4f75
commit 5b96bd4f75
parent c5b0540b11
9 changed files with 295 additions and 9 deletions
--- a/backend/csv_parser/core.py
+++ b/backend/csv_parser/core.py
@ -190,7 +190,12 @@ def iter_csv_dict_rows(
    *,
    has_header: bool = True,
 ) -> Iterator[Dict[str, str]]:
-    """Vollständige Datei zeilenweise als Dict (Header = Keys)."""
+    """
+    Vollständige Datei zeilenweise als Dict (Header = Keys).
+    Spaltenreihenfolge ist egal; zusätzliche Spalten werden ignoriert, wenn sie nicht
+    in field_mappings vorkommen. Keine Obergrenze für die Spaltenanzahl (nur Zeilenlimits
+    kommen aus system_config / Import-Router).
+    """
    if not has_header:
        raise ValueError("CSV ohne Kopfzeile wird für Import noch nicht unterstützt")
    normalized = text.replace("\r\n", "\n").replace("\r", "\n")
--- a/backend/csv_parser/executor.py
+++ b/backend/csv_parser/executor.py
@ -397,6 +397,87 @@ def _import_weight(
    }


+def diagnose_blood_pressure_row(mapped_typed: dict[str, Any]) -> dict[str, Any]:
+    """Zeigt, ob Datum/Zeit nach Vorlage + Alias + Apple-Start-Spalte erkannt werden."""
+    md = coerce_date(mapped_typed.get("measured_date"))
+    mt = mapped_typed.get("measured_time")
+    st_combined = mapped_typed.get("start_time")
+    if isinstance(st_combined, dt.datetime):
+        if md is None:
+            md = st_combined.date()
+        if mt is None:
+            mt = st_combined.time()
+    elif isinstance(st_combined, str) and st_combined.strip() and (md is None or mt is None):
+        try:
+            from dateutil import parser as du_parser
+
+            dtp = du_parser.parse(st_combined.strip())
+            if md is None:
+                md = dtp.date()
+            if mt is None:
+                mt = dtp.time()
+        except (ValueError, TypeError, OverflowError):
+            pass
+    sys_v = mapped_typed.get("systolic")
+    dia_v = mapped_typed.get("diastolic")
+    try:
+        int(sys_v)
+        int(dia_v)
+        ok_bp = True
+    except (TypeError, ValueError):
+        ok_bp = False
+    return {
+        "measured_date_iso": md.isoformat() if md else None,
+        "has_measured_time": mt is not None,
+        "start_time_raw_type": type(st_combined).__name__ if st_combined is not None else None,
+        "systolic_ok": ok_bp,
+        "would_reach_insert_check": md is not None and mt is not None,
+    }
+
+
+def diagnose_activity_row(mapped_typed: dict[str, Any]) -> dict[str, Any]:
+    activity_type = mapped_typed.get("activity_type")
+    start_raw = mapped_typed.get("start_time")
+    date_d = coerce_date(mapped_typed.get("date"))
+    start_key: str | None = None
+    fail_hint: str | None = None
+    if isinstance(start_raw, dt.datetime):
+        start_key = start_raw.strftime("%Y-%m-%d %H:%M:%S")
+        if date_d is None:
+            date_d = start_raw.date()
+    elif isinstance(start_raw, dt.time):
+        if date_d is None:
+            fail_hint = "startzeit_ohne_datum"
+        else:
+            start_key = f"{date_d.isoformat()} {start_raw.strftime('%H:%M:%S')}"
+    elif isinstance(start_raw, str) and start_raw.strip():
+        s = start_raw.strip()
+        if date_d is not None and _looks_like_time_only(s):
+            start_key = f"{date_d.isoformat()} {s}"
+        else:
+            start_key = s
+            if date_d is None and len(start_key) >= 10:
+                for fmt in ("%Y-%m-%d", "%d.%m.%Y"):
+                    try:
+                        date_d = dt.datetime.strptime(start_key[:10], fmt).date()
+                        break
+                    except ValueError:
+                        continue
+    has_type = bool(activity_type and str(activity_type).strip())
+    ok = has_type and date_d is not None and bool(start_key)
+    if fail_hint is None and not has_type:
+        fail_hint = "trainingsart_fehlt"
+    elif fail_hint is None and not ok:
+        fail_hint = "datum_start_fehlt"
+    return {
+        "activity_type_preview": (str(activity_type).strip()[:80] if activity_type else None),
+        "date_iso": date_d.isoformat() if date_d else None,
+        "start_key_preview": (start_key[:80] if start_key else None),
+        "would_pass_row_gate": ok,
+        "fail_hint": fail_hint,
+    }
+
+
 def _import_blood_pressure(
    cur,
    profile_id: str,
@ -417,6 +498,23 @@ def _import_blood_pressure(
        mapped = build_row_after_mapping(csv_row, fm, tc, module="blood_pressure")
        md = coerce_date(mapped.get("measured_date"))
        mt = mapped.get("measured_time")
+        st_combined = mapped.get("start_time")
+        if isinstance(st_combined, dt.datetime):
+            if md is None:
+                md = st_combined.date()
+            if mt is None:
+                mt = st_combined.time()
+        elif isinstance(st_combined, str) and st_combined.strip() and (md is None or mt is None):
+            try:
+                from dateutil import parser as du_parser
+
+                dtp = du_parser.parse(st_combined.strip())
+                if md is None:
+                    md = dtp.date()
+                if mt is None:
+                    mt = dtp.time()
+            except (ValueError, TypeError, OverflowError):
+                pass
        if md is None:
            error_details.append({"row": rows_total, "error": "Datum fehlt"})
            continue
--- a/backend/csv_parser/mapping_suggest.py
+++ b/backend/csv_parser/mapping_suggest.py
@ -71,6 +71,7 @@ _DEFAULT_TYPE_CONVERSIONS: dict[str, dict[str, dict[str, Any]]] = {
    "blood_pressure": {
        "measured_date": {"type": "date", "format": "dd.mm.yyyy", "flexible": True},
        "measured_time": {"type": "time", "format": "HH:MM", "flexible": True},
+        "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True},
        "systolic": {"type": "int", "flexible": True},
        "diastolic": {"type": "int", "flexible": True},
        "pulse": {"type": "int", "flexible": True},
--- a/backend/csv_parser/module_registry.py
+++ b/backend/csv_parser/module_registry.py
@ -86,6 +86,8 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
        "fields": {
            "measured_date": {"type": "date", "required": True},
            "measured_time": {"type": "time", "required": True},
+            # Apple Health: eine Spalte „Start“ / „Datum/Uhrzeit“ (Datetime); Executor splittet.
+            "start_time": {"type": "datetime", "required": False},
            "systolic": {"type": "int", "required": True},
            "diastolic": {"type": "int", "required": True},
            "pulse": {"type": "int", "required": False},
@ -152,6 +154,8 @@ def validate_required_field_targets(module: str, field_mappings: dict) -> None:
        raise ValueError(f"Unbekanntes Modul: {module}")
    field_defs = cast(dict, mod["fields"])
    targets = {v for v in field_mappings.values() if v and v not in ("-", "_skip")}
+    if module == "blood_pressure" and "start_time" in targets:
+        targets = set(targets) | {"measured_date", "measured_time"}
    for fname, finfo in field_defs.items():
        if finfo.get("required") and fname not in targets:
            raise ValueError(f"Pflicht-Zielfeld nicht zugeordnet: {fname}")
--- a/backend/csv_parser/type_converter.py
+++ b/backend/csv_parser/type_converter.py
@ -284,6 +284,15 @@ def _parse_int(raw: str, spec: Mapping[str, Any]) -> int:
    s = raw.strip()
    if bool(spec.get("flexible")) or spec.get("thousands_separator") == "auto":
        s2 = _normalize_num_token(s)
+        if not s2 or s2 in ("-", "—", "–"):
+            raise ValueError("leer")
+        # EU-Dezimal (z. B. Apple DE «37,26» für HRV) — nicht alle Ziffern konkatenieren (würde 3726 → CHECK).
+        if "," in s2 or "." in s2:
+            try:
+                fv = _parse_float_auto(s2)
+                return int(round(fv))
+            except (ValueError, InvalidOperation):
+                pass
        neg = s2.startswith("-")
        body = s2[1:] if neg else s2
        digits = re.sub(r"\D", "", body)
@ -425,6 +434,79 @@ def _vitals_baseline_alias_db_field(csv_col: str) -> str | None:
    return None


+def _blood_pressure_alias_db_field(csv_col: str) -> str | None:
+    """
+    Omron (schmal) vs. Apple-Gesundheit (Breitexport): unterschiedliche Spaltennamen;
+    kombinierte Messzeit oft als „Start“ oder „Datum/Uhrzeit“.
+    """
+    n = normalize_header_for_signature(str(csv_col))
+    low = str(csv_col).lower()
+    if n in ("datum_uhrzeit", "datetime", "date_time", "messzeitpunkt"):
+        return "start_time"
+    if n in ("start", "beginn"):
+        return "start_time"
+    if n in ("datum", "date", "messdatum"):
+        return "measured_date"
+    if n in ("zeit", "time", "uhrzeit"):
+        return "measured_time"
+    if "systolisch" in n or ("blutdruck" in n and "systol" in low) or n.startswith("systolic"):
+        return "systolic"
+    if "diastolisch" in n or ("blutdruck" in n and "diastol" in low) or n.startswith("diastolic"):
+        return "diastolic"
+    if n.startswith("puls") or n.startswith("pulse") or "puls_" in n:
+        return "pulse"
+    return None
+
+
+def _activity_alias_db_field(csv_col: str) -> str | None:
+    """
+    Apple-Workout schmal vs. Breitexport (viele Spalten): Trainingsart/Dauer/Strecke
+    trotzdem zuverlässig erkennen.
+    """
+    n = normalize_header_for_signature(str(csv_col))
+    low = str(csv_col).lower()
+    if n in ("trainingsart", "workout_type", "activity_type", "workouttype"):
+        return "activity_type"
+    if ("trainings" in n and "art" in n) or ("workout" in low and "type" in low):
+        return "activity_type"
+    if n in ("datum_uhrzeit", "start", "beginn", "startzeit", "von"):
+        return "start_time"
+    if n in ("ende", "end", "endzeit", "bis"):
+        return "end_time"
+    if n in ("date", "datum"):
+        return "date"
+    if "dauer" in n or n == "duration" or n.startswith("duration_"):
+        return "duration_min"
+    if ("strecke" in n or "distance" in low) and ("km" in low or "(km" in low or " km" in low):
+        return "distance_km"
+    if "aktive_energie" in n or "active_energy" in n:
+        return "kcal_active"
+    if "ruheenergie" in n or "resting_energy" in n:
+        return "kcal_resting"
+    if ("herzfrequenz" in n or "heart_rate" in n) and ("max" in low or "max" in n):
+        return "hr_max"
+    if (
+        "durchschnittliche_herzfrequenz" in n
+        or "heart_rate_average" in n
+        or ("herzfrequenz" in n and ("durchschn" in n or "avg" in low or "average" in low))
+        or ("heart_rate" in n and ("avg" in low or "average" in low))
+    ):
+        return "hr_avg"
+    return None
+
+
+def _effective_conversion_spec(
+    db_field: str,
+    spec: Mapping[str, Any] | None,
+    module: str | None,
+) -> Mapping[str, Any] | None:
+    if spec is not None:
+        return spec
+    if module == "blood_pressure" and db_field == "start_time":
+        return {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}
+    return None
+
+
 def build_row_after_mapping(
    csv_row: Mapping[str, str],
    field_mappings: Mapping[str, str],
@ -434,6 +516,9 @@ def build_row_after_mapping(
    """
    Wendet Zuordnung csv_spalte → db_feld und Typkonvertierung an.
    Unzugeordnete oder „—“ werden übersprungen.
+    Die Reihenfolge der Spalten in der CSV spielt keine Rolle (Dict-Zugriff nach Name).
+    Falls mehrere Spalten auf dasselbe db_field abbilden, gewinnt die zuletzt verarbeitete
+    (iterreihenfolge = Kopfzeilen-Reihenfolge in der Datei) — in der Praxis selten.
    """
    out: dict[str, Any] = {}
    tc = type_conversions or {}
@ -441,9 +526,16 @@ def build_row_after_mapping(
        db_field = _lookup_db_field(str(csv_col), field_mappings)
        if not db_field and module == "vitals_baseline":
            db_field = _vitals_baseline_alias_db_field(csv_col)
+        elif not db_field and module == "blood_pressure":
+            db_field = _blood_pressure_alias_db_field(csv_col)
+        elif not db_field and module == "activity":
+            db_field = _activity_alias_db_field(csv_col)
        if not db_field:
            continue
-        spec = tc.get(db_field)
+        raw_spec = tc.get(db_field) if isinstance(tc, dict) else None
+        if not isinstance(raw_spec, dict):
+            raw_spec = None
+        spec = _effective_conversion_spec(db_field, raw_spec, module)
        try:
            out[db_field] = convert_value(
                raw, db_field, spec if isinstance(spec, dict) else None, module=module
@ -460,11 +552,13 @@ def diagnose_row_mapping(
    module: str | None = None,
    *,
    mapped_typed: Mapping[str, Any] | None = None,
-    max_columns: int = 96,
+    max_columns: int = 512,
 ) -> dict[str, Any]:
    """
-    Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Vital-Alias), Konvertierung pro Spalte,
+    Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Alias), Konvertierung pro Spalte,
    Ergebnis wie build_row_after_mapping (json-freundliche Vorschau).
+    max_columns begrenzt nur die Länge der Liste „per_column“ in der Antwort — der echte
+    Import verarbeitet alle Spalten (siehe iter_csv_dict_rows / build_row_after_mapping).
    """
    tc = type_conversions or {}
    per_column: list[dict[str, Any]] = []
@ -478,9 +572,16 @@ def diagnose_row_mapping(
        via_a = None
        if not via_t and module == "vitals_baseline":
            via_a = _vitals_baseline_alias_db_field(sc)
+        elif not via_t and module == "blood_pressure":
+            via_a = _blood_pressure_alias_db_field(sc)
+        elif not via_t and module == "activity":
+            via_a = _activity_alias_db_field(sc)
        target = via_t or via_a
        src = "template" if via_t else ("alias" if via_a else "none")
-        spec = tc.get(target) if target else None
+        raw_spec = tc.get(target) if isinstance(tc, dict) and target else None
+        if not isinstance(raw_spec, dict):
+            raw_spec = None
+        spec = _effective_conversion_spec(target, raw_spec, module) if target else None
        conv_err: str | None = None
        conv_preview: Any = None
        if target:
--- a/backend/routers/csv_import.py
+++ b/backend/routers/csv_import.py
@ -15,7 +15,12 @@ from auth import require_auth, check_feature_access, increment_feature_usage
 from feature_logger import log_feature_usage
 from db import get_db, get_cursor, r2d
 from routers.profiles import get_pid
-from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import
+from csv_parser.executor import (
+    diagnose_activity_row,
+    diagnose_blood_pressure_row,
+    diagnose_vitals_row,
+    run_universal_csv_import,
+)
 from csv_parser.core import (
    decode_raw_bytes,
    column_signature,
@ -392,6 +397,10 @@ async def csv_import_diagnose(
        }
        if exec_module == "vitals_baseline":
            entry["vitals"] = diagnose_vitals_row(typed)
+        elif exec_module == "blood_pressure":
+            entry["blood_pressure"] = diagnose_blood_pressure_row(typed)
+        elif exec_module == "activity":
+            entry["activity"] = diagnose_activity_row(typed)
        rows_out.append(entry)

    return {
--- a/backend/tests/test_csv_import_executor.py
+++ b/backend/tests/test_csv_import_executor.py
@ -7,6 +7,7 @@ pytest-Lauf mitlaufen.

 from __future__ import annotations

+import datetime as dt
 import uuid

 import pytest
@ -317,6 +318,49 @@ def test_run_universal_import_weight_two_rows_same_day_last_value():
    assert params[3] == 83.5


+def test_activity_alias_maps_german_workout_wide_columns():
+    from csv_parser.type_converter import build_row_after_mapping
+
+    row = {
+        "Datum/Uhrzeit": "2026-04-03 08:00:00",
+        "Trainingsart": "Laufen",
+        "Dauer": "0:45:00",
+    }
+    fm = {"Workout Type": "activity_type"}
+    tc = {
+        "activity_type": {"type": "string"},
+        "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True},
+        "duration_min": {
+            "type": "duration",
+            "format": "HH:MM:SS",
+            "target_unit": "minutes",
+            "flexible": True,
+        },
+    }
+    out = build_row_after_mapping(row, fm, tc, module="activity")
+    assert str(out.get("activity_type")) == "Laufen"
+    assert isinstance(out.get("start_time"), dt.datetime)
+    assert out.get("duration_min") == 45.0
+
+
+def test_blood_pressure_alias_combined_datetime_column():
+    from csv_parser.type_converter import build_row_after_mapping
+
+    row = {
+        "Datum/Uhrzeit": "2026-04-03 10:30:00",
+        "Systolisch (mmHg)": "120",
+        "Diastolisch (mmHg)": "80",
+    }
+    fm = {"Date": "measured_date"}
+    tc = {
+        "systolic": {"type": "int", "flexible": True},
+        "diastolic": {"type": "int", "flexible": True},
+    }
+    out = build_row_after_mapping(row, fm, tc, module="blood_pressure")
+    assert isinstance(out.get("start_time"), dt.datetime)
+    assert int(out.get("systolic")) == 120
+
+
 def test_diagnose_vitals_row_and_mapping_smoke():
    fm = {
        "Datum/Uhrzeit": "date",
--- a/backend/tests/test_csv_parser_core.py
+++ b/backend/tests/test_csv_parser_core.py
@ -210,6 +210,29 @@ def test_int_flexible_thousands():
    assert convert_value("1.234", "n", {"type": "int", "flexible": True}) == 1234


+def test_build_row_after_mapping_column_order_independent():
+    fm = {"Spalte B": "resting_hr", "Spalte A": "date"}
+    tc = {
+        "date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True},
+        "resting_hr": {"type": "int", "flexible": True},
+    }
+    r1 = build_row_after_mapping(
+        {"Spalte A": "2026-01-15", "Spalte B": "58"}, fm, tc, module="vitals_baseline"
+    )
+    r2 = build_row_after_mapping(
+        {"Spalte B": "58", "Spalte A": "2026-01-15"}, fm, tc, module="vitals_baseline"
+    )
+    assert r1 == r2
+    assert r1["resting_hr"] == 58
+
+
+def test_int_flexible_german_decimal_rounds():
+    """Apple-DE: HRV/SpO2 als «37,26» / «95,22» — nicht 3726 aus Ziffern konkatenieren."""
+    spec = {"type": "int", "flexible": True}
+    assert convert_value("37,26", "hrv", spec) == 37
+    assert convert_value("95,22", "spo2", spec) == 95
+
+
 def test_datetime_flexible():
    spec = {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}
    dtv = convert_value("15.01.2024 14:30:00", "t", spec)
--- a/frontend/src/pages/UniversalCsvImportPage.jsx
+++ b/frontend/src/pages/UniversalCsvImportPage.jsx
@ -556,9 +556,10 @@ export default function UniversalCsvImportPage() {
              </summary>
              <p style={{ fontSize: 13, color: 'var(--text3)', marginTop: 8, lineHeight: 1.5 }}>
                Vorlage #{diagnoseResult.mapping_id} · {diagnoseResult.mapping_name} · Modul{' '}
-                {MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Bei Vitalwerten: pro Zeile{' '}
-                <code>vitals.would_pass_prefilter</code> und{' '}
-                <code>prefilter_fail_reason</code> prüfen (z. B. <code>datum_fehlt</code>).
+                {MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Hinweise: Vitalwerte{' '}
+                <code>vitals.*</code>, Blutdruck <code>blood_pressure.*</code>, Workouts{' '}
+                <code>activity.*</code> (z. B. <code>would_pass_row_gate</code> /{' '}
+                <code>prefilter_fail_reason</code>).
              </p>
              <pre
                style={{