feat(csv-import): Add blood pressure and activity row diagnosis functionality
All checks were successful
Deploy Development / deploy (push) Successful in 55s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s

- Introduced `diagnose_blood_pressure_row` and `diagnose_activity_row` functions to validate and analyze blood pressure and activity data from CSV imports.
- Updated the CSV import logic to handle combined datetime columns for blood pressure and activity, improving data integrity during import.
- Enhanced type conversion specifications to include `start_time` for blood pressure and activity, ensuring accurate data mapping.
- Added tests to validate the new diagnosis functions and their integration with existing import processes, ensuring robustness and reliability.
- Updated frontend messages to provide clearer guidance on blood pressure and activity data handling during CSV imports.
This commit is contained in:
Lars 2026-04-10 16:43:00 +02:00
parent c5b0540b11
commit 5b96bd4f75
9 changed files with 295 additions and 9 deletions

View File

@ -190,7 +190,12 @@ def iter_csv_dict_rows(
*,
has_header: bool = True,
) -> Iterator[Dict[str, str]]:
"""Vollständige Datei zeilenweise als Dict (Header = Keys)."""
"""
Vollständige Datei zeilenweise als Dict (Header = Keys).
Spaltenreihenfolge ist egal; zusätzliche Spalten werden ignoriert, wenn sie nicht
in field_mappings vorkommen. Keine Obergrenze für die Spaltenanzahl (nur Zeilenlimits
kommen aus system_config / Import-Router).
"""
if not has_header:
raise ValueError("CSV ohne Kopfzeile wird für Import noch nicht unterstützt")
normalized = text.replace("\r\n", "\n").replace("\r", "\n")

View File

@ -397,6 +397,87 @@ def _import_weight(
}
def diagnose_blood_pressure_row(mapped_typed: dict[str, Any]) -> dict[str, Any]:
"""Zeigt, ob Datum/Zeit nach Vorlage + Alias + Apple-Start-Spalte erkannt werden."""
md = coerce_date(mapped_typed.get("measured_date"))
mt = mapped_typed.get("measured_time")
st_combined = mapped_typed.get("start_time")
if isinstance(st_combined, dt.datetime):
if md is None:
md = st_combined.date()
if mt is None:
mt = st_combined.time()
elif isinstance(st_combined, str) and st_combined.strip() and (md is None or mt is None):
try:
from dateutil import parser as du_parser
dtp = du_parser.parse(st_combined.strip())
if md is None:
md = dtp.date()
if mt is None:
mt = dtp.time()
except (ValueError, TypeError, OverflowError):
pass
sys_v = mapped_typed.get("systolic")
dia_v = mapped_typed.get("diastolic")
try:
int(sys_v)
int(dia_v)
ok_bp = True
except (TypeError, ValueError):
ok_bp = False
return {
"measured_date_iso": md.isoformat() if md else None,
"has_measured_time": mt is not None,
"start_time_raw_type": type(st_combined).__name__ if st_combined is not None else None,
"systolic_ok": ok_bp,
"would_reach_insert_check": md is not None and mt is not None,
}
def diagnose_activity_row(mapped_typed: dict[str, Any]) -> dict[str, Any]:
activity_type = mapped_typed.get("activity_type")
start_raw = mapped_typed.get("start_time")
date_d = coerce_date(mapped_typed.get("date"))
start_key: str | None = None
fail_hint: str | None = None
if isinstance(start_raw, dt.datetime):
start_key = start_raw.strftime("%Y-%m-%d %H:%M:%S")
if date_d is None:
date_d = start_raw.date()
elif isinstance(start_raw, dt.time):
if date_d is None:
fail_hint = "startzeit_ohne_datum"
else:
start_key = f"{date_d.isoformat()} {start_raw.strftime('%H:%M:%S')}"
elif isinstance(start_raw, str) and start_raw.strip():
s = start_raw.strip()
if date_d is not None and _looks_like_time_only(s):
start_key = f"{date_d.isoformat()} {s}"
else:
start_key = s
if date_d is None and len(start_key) >= 10:
for fmt in ("%Y-%m-%d", "%d.%m.%Y"):
try:
date_d = dt.datetime.strptime(start_key[:10], fmt).date()
break
except ValueError:
continue
has_type = bool(activity_type and str(activity_type).strip())
ok = has_type and date_d is not None and bool(start_key)
if fail_hint is None and not has_type:
fail_hint = "trainingsart_fehlt"
elif fail_hint is None and not ok:
fail_hint = "datum_start_fehlt"
return {
"activity_type_preview": (str(activity_type).strip()[:80] if activity_type else None),
"date_iso": date_d.isoformat() if date_d else None,
"start_key_preview": (start_key[:80] if start_key else None),
"would_pass_row_gate": ok,
"fail_hint": fail_hint,
}
def _import_blood_pressure(
cur,
profile_id: str,
@ -417,6 +498,23 @@ def _import_blood_pressure(
mapped = build_row_after_mapping(csv_row, fm, tc, module="blood_pressure")
md = coerce_date(mapped.get("measured_date"))
mt = mapped.get("measured_time")
st_combined = mapped.get("start_time")
if isinstance(st_combined, dt.datetime):
if md is None:
md = st_combined.date()
if mt is None:
mt = st_combined.time()
elif isinstance(st_combined, str) and st_combined.strip() and (md is None or mt is None):
try:
from dateutil import parser as du_parser
dtp = du_parser.parse(st_combined.strip())
if md is None:
md = dtp.date()
if mt is None:
mt = dtp.time()
except (ValueError, TypeError, OverflowError):
pass
if md is None:
error_details.append({"row": rows_total, "error": "Datum fehlt"})
continue

View File

@ -71,6 +71,7 @@ _DEFAULT_TYPE_CONVERSIONS: dict[str, dict[str, dict[str, Any]]] = {
"blood_pressure": {
"measured_date": {"type": "date", "format": "dd.mm.yyyy", "flexible": True},
"measured_time": {"type": "time", "format": "HH:MM", "flexible": True},
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True},
"systolic": {"type": "int", "flexible": True},
"diastolic": {"type": "int", "flexible": True},
"pulse": {"type": "int", "flexible": True},

View File

@ -86,6 +86,8 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
"fields": {
"measured_date": {"type": "date", "required": True},
"measured_time": {"type": "time", "required": True},
# Apple Health: eine Spalte „Start“ / „Datum/Uhrzeit“ (Datetime); Executor splittet.
"start_time": {"type": "datetime", "required": False},
"systolic": {"type": "int", "required": True},
"diastolic": {"type": "int", "required": True},
"pulse": {"type": "int", "required": False},
@ -152,6 +154,8 @@ def validate_required_field_targets(module: str, field_mappings: dict) -> None:
raise ValueError(f"Unbekanntes Modul: {module}")
field_defs = cast(dict, mod["fields"])
targets = {v for v in field_mappings.values() if v and v not in ("-", "_skip")}
if module == "blood_pressure" and "start_time" in targets:
targets = set(targets) | {"measured_date", "measured_time"}
for fname, finfo in field_defs.items():
if finfo.get("required") and fname not in targets:
raise ValueError(f"Pflicht-Zielfeld nicht zugeordnet: {fname}")

View File

@ -284,6 +284,15 @@ def _parse_int(raw: str, spec: Mapping[str, Any]) -> int:
s = raw.strip()
if bool(spec.get("flexible")) or spec.get("thousands_separator") == "auto":
s2 = _normalize_num_token(s)
if not s2 or s2 in ("-", "", ""):
raise ValueError("leer")
# EU-Dezimal (z. B. Apple DE «37,26» für HRV) — nicht alle Ziffern konkatenieren (würde 3726 → CHECK).
if "," in s2 or "." in s2:
try:
fv = _parse_float_auto(s2)
return int(round(fv))
except (ValueError, InvalidOperation):
pass
neg = s2.startswith("-")
body = s2[1:] if neg else s2
digits = re.sub(r"\D", "", body)
@ -425,6 +434,79 @@ def _vitals_baseline_alias_db_field(csv_col: str) -> str | None:
return None
def _blood_pressure_alias_db_field(csv_col: str) -> str | None:
"""
Omron (schmal) vs. Apple-Gesundheit (Breitexport): unterschiedliche Spaltennamen;
kombinierte Messzeit oft als Start oder Datum/Uhrzeit.
"""
n = normalize_header_for_signature(str(csv_col))
low = str(csv_col).lower()
if n in ("datum_uhrzeit", "datetime", "date_time", "messzeitpunkt"):
return "start_time"
if n in ("start", "beginn"):
return "start_time"
if n in ("datum", "date", "messdatum"):
return "measured_date"
if n in ("zeit", "time", "uhrzeit"):
return "measured_time"
if "systolisch" in n or ("blutdruck" in n and "systol" in low) or n.startswith("systolic"):
return "systolic"
if "diastolisch" in n or ("blutdruck" in n and "diastol" in low) or n.startswith("diastolic"):
return "diastolic"
if n.startswith("puls") or n.startswith("pulse") or "puls_" in n:
return "pulse"
return None
def _activity_alias_db_field(csv_col: str) -> str | None:
"""
Apple-Workout schmal vs. Breitexport (viele Spalten): Trainingsart/Dauer/Strecke
trotzdem zuverlässig erkennen.
"""
n = normalize_header_for_signature(str(csv_col))
low = str(csv_col).lower()
if n in ("trainingsart", "workout_type", "activity_type", "workouttype"):
return "activity_type"
if ("trainings" in n and "art" in n) or ("workout" in low and "type" in low):
return "activity_type"
if n in ("datum_uhrzeit", "start", "beginn", "startzeit", "von"):
return "start_time"
if n in ("ende", "end", "endzeit", "bis"):
return "end_time"
if n in ("date", "datum"):
return "date"
if "dauer" in n or n == "duration" or n.startswith("duration_"):
return "duration_min"
if ("strecke" in n or "distance" in low) and ("km" in low or "(km" in low or " km" in low):
return "distance_km"
if "aktive_energie" in n or "active_energy" in n:
return "kcal_active"
if "ruheenergie" in n or "resting_energy" in n:
return "kcal_resting"
if ("herzfrequenz" in n or "heart_rate" in n) and ("max" in low or "max" in n):
return "hr_max"
if (
"durchschnittliche_herzfrequenz" in n
or "heart_rate_average" in n
or ("herzfrequenz" in n and ("durchschn" in n or "avg" in low or "average" in low))
or ("heart_rate" in n and ("avg" in low or "average" in low))
):
return "hr_avg"
return None
def _effective_conversion_spec(
db_field: str,
spec: Mapping[str, Any] | None,
module: str | None,
) -> Mapping[str, Any] | None:
if spec is not None:
return spec
if module == "blood_pressure" and db_field == "start_time":
return {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}
return None
def build_row_after_mapping(
csv_row: Mapping[str, str],
field_mappings: Mapping[str, str],
@ -434,6 +516,9 @@ def build_row_after_mapping(
"""
Wendet Zuordnung csv_spalte db_feld und Typkonvertierung an.
Unzugeordnete oder werden übersprungen.
Die Reihenfolge der Spalten in der CSV spielt keine Rolle (Dict-Zugriff nach Name).
Falls mehrere Spalten auf dasselbe db_field abbilden, gewinnt die zuletzt verarbeitete
(iterreihenfolge = Kopfzeilen-Reihenfolge in der Datei) in der Praxis selten.
"""
out: dict[str, Any] = {}
tc = type_conversions or {}
@ -441,9 +526,16 @@ def build_row_after_mapping(
db_field = _lookup_db_field(str(csv_col), field_mappings)
if not db_field and module == "vitals_baseline":
db_field = _vitals_baseline_alias_db_field(csv_col)
elif not db_field and module == "blood_pressure":
db_field = _blood_pressure_alias_db_field(csv_col)
elif not db_field and module == "activity":
db_field = _activity_alias_db_field(csv_col)
if not db_field:
continue
spec = tc.get(db_field)
raw_spec = tc.get(db_field) if isinstance(tc, dict) else None
if not isinstance(raw_spec, dict):
raw_spec = None
spec = _effective_conversion_spec(db_field, raw_spec, module)
try:
out[db_field] = convert_value(
raw, db_field, spec if isinstance(spec, dict) else None, module=module
@ -460,11 +552,13 @@ def diagnose_row_mapping(
module: str | None = None,
*,
mapped_typed: Mapping[str, Any] | None = None,
max_columns: int = 96,
max_columns: int = 512,
) -> dict[str, Any]:
"""
Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Vital-Alias), Konvertierung pro Spalte,
Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Alias), Konvertierung pro Spalte,
Ergebnis wie build_row_after_mapping (json-freundliche Vorschau).
max_columns begrenzt nur die Länge der Liste per_column in der Antwort der echte
Import verarbeitet alle Spalten (siehe iter_csv_dict_rows / build_row_after_mapping).
"""
tc = type_conversions or {}
per_column: list[dict[str, Any]] = []
@ -478,9 +572,16 @@ def diagnose_row_mapping(
via_a = None
if not via_t and module == "vitals_baseline":
via_a = _vitals_baseline_alias_db_field(sc)
elif not via_t and module == "blood_pressure":
via_a = _blood_pressure_alias_db_field(sc)
elif not via_t and module == "activity":
via_a = _activity_alias_db_field(sc)
target = via_t or via_a
src = "template" if via_t else ("alias" if via_a else "none")
spec = tc.get(target) if target else None
raw_spec = tc.get(target) if isinstance(tc, dict) and target else None
if not isinstance(raw_spec, dict):
raw_spec = None
spec = _effective_conversion_spec(target, raw_spec, module) if target else None
conv_err: str | None = None
conv_preview: Any = None
if target:

View File

@ -15,7 +15,12 @@ from auth import require_auth, check_feature_access, increment_feature_usage
from feature_logger import log_feature_usage
from db import get_db, get_cursor, r2d
from routers.profiles import get_pid
from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import
from csv_parser.executor import (
diagnose_activity_row,
diagnose_blood_pressure_row,
diagnose_vitals_row,
run_universal_csv_import,
)
from csv_parser.core import (
decode_raw_bytes,
column_signature,
@ -392,6 +397,10 @@ async def csv_import_diagnose(
}
if exec_module == "vitals_baseline":
entry["vitals"] = diagnose_vitals_row(typed)
elif exec_module == "blood_pressure":
entry["blood_pressure"] = diagnose_blood_pressure_row(typed)
elif exec_module == "activity":
entry["activity"] = diagnose_activity_row(typed)
rows_out.append(entry)
return {

View File

@ -7,6 +7,7 @@ pytest-Lauf mitlaufen.
from __future__ import annotations
import datetime as dt
import uuid
import pytest
@ -317,6 +318,49 @@ def test_run_universal_import_weight_two_rows_same_day_last_value():
assert params[3] == 83.5
def test_activity_alias_maps_german_workout_wide_columns():
from csv_parser.type_converter import build_row_after_mapping
row = {
"Datum/Uhrzeit": "2026-04-03 08:00:00",
"Trainingsart": "Laufen",
"Dauer": "0:45:00",
}
fm = {"Workout Type": "activity_type"}
tc = {
"activity_type": {"type": "string"},
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True},
"duration_min": {
"type": "duration",
"format": "HH:MM:SS",
"target_unit": "minutes",
"flexible": True,
},
}
out = build_row_after_mapping(row, fm, tc, module="activity")
assert str(out.get("activity_type")) == "Laufen"
assert isinstance(out.get("start_time"), dt.datetime)
assert out.get("duration_min") == 45.0
def test_blood_pressure_alias_combined_datetime_column():
from csv_parser.type_converter import build_row_after_mapping
row = {
"Datum/Uhrzeit": "2026-04-03 10:30:00",
"Systolisch (mmHg)": "120",
"Diastolisch (mmHg)": "80",
}
fm = {"Date": "measured_date"}
tc = {
"systolic": {"type": "int", "flexible": True},
"diastolic": {"type": "int", "flexible": True},
}
out = build_row_after_mapping(row, fm, tc, module="blood_pressure")
assert isinstance(out.get("start_time"), dt.datetime)
assert int(out.get("systolic")) == 120
def test_diagnose_vitals_row_and_mapping_smoke():
fm = {
"Datum/Uhrzeit": "date",

View File

@ -210,6 +210,29 @@ def test_int_flexible_thousands():
assert convert_value("1.234", "n", {"type": "int", "flexible": True}) == 1234
def test_build_row_after_mapping_column_order_independent():
fm = {"Spalte B": "resting_hr", "Spalte A": "date"}
tc = {
"date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True},
"resting_hr": {"type": "int", "flexible": True},
}
r1 = build_row_after_mapping(
{"Spalte A": "2026-01-15", "Spalte B": "58"}, fm, tc, module="vitals_baseline"
)
r2 = build_row_after_mapping(
{"Spalte B": "58", "Spalte A": "2026-01-15"}, fm, tc, module="vitals_baseline"
)
assert r1 == r2
assert r1["resting_hr"] == 58
def test_int_flexible_german_decimal_rounds():
"""Apple-DE: HRV/SpO2 als «37,26» / «95,22» — nicht 3726 aus Ziffern konkatenieren."""
spec = {"type": "int", "flexible": True}
assert convert_value("37,26", "hrv", spec) == 37
assert convert_value("95,22", "spo2", spec) == 95
def test_datetime_flexible():
spec = {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}
dtv = convert_value("15.01.2024 14:30:00", "t", spec)

View File

@ -556,9 +556,10 @@ export default function UniversalCsvImportPage() {
</summary>
<p style={{ fontSize: 13, color: 'var(--text3)', marginTop: 8, lineHeight: 1.5 }}>
Vorlage #{diagnoseResult.mapping_id} · {diagnoseResult.mapping_name} · Modul{' '}
{MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Bei Vitalwerten: pro Zeile{' '}
<code>vitals.would_pass_prefilter</code> und{' '}
<code>prefilter_fail_reason</code> prüfen (z.B. <code>datum_fehlt</code>).
{MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Hinweise: Vitalwerte{' '}
<code>vitals.*</code>, Blutdruck <code>blood_pressure.*</code>, Workouts{' '}
<code>activity.*</code> (z.B. <code>would_pass_row_gate</code> /{' '}
<code>prefilter_fail_reason</code>).
</p>
<pre
style={{