feat(csv-import): Enhance CSV import processing and validation
All checks were successful
Deploy Development / deploy (push) Successful in 56s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 1s
Build Test / build-frontend (push) Successful in 16s

- Updated the CSV import logic to support new row processing specifications for weight and vitals baseline, allowing for better data aggregation and validation.
- Implemented handling for multiple rows on the same day, enabling aggregation of values such as averages for vitals and last values for weight.
- Enhanced test coverage for the new import functionalities, ensuring correct behavior during data processing and validation.
- Refactored the module registry to include default import row processing options for better flexibility in handling CSV data.
This commit is contained in:
Lars 2026-04-10 15:09:34 +02:00
parent c0fcdea1fe
commit e35d167055
5 changed files with 165 additions and 9 deletions

View File

@ -34,7 +34,7 @@ jobs:
docker compose -f "$COMPOSE_FILE" exec -T backend sh -lc "
pip install -r /app/requirements-dev.txt &&
cd /app &&
python -m pytest tests -m 'not slow' -q --tb=short
python -m pytest tests -m 'not slow' -ra -vv --tb=short
"
lint-backend:

View File

@ -130,6 +130,7 @@ def run_universal_csv_import(
bool(has_header),
fm,
tc,
mapping,
error_details,
affected_ids,
)
@ -169,6 +170,7 @@ def run_universal_csv_import(
bool(has_header),
fm,
tc,
mapping,
error_details,
affected_ids,
)
@ -296,19 +298,18 @@ def _import_weight(
has_header: bool,
fm: dict,
tc: dict | None,
mapping: dict[str, Any],
error_details: list,
affected_ids: dict,
) -> dict[str, int]:
spec = resolve_import_row_processing("weight", mapping)
mapped_rows: list[dict[str, Any]] = []
rows_total = 0
inserted = 0
updated = 0
new_entries = 0
for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header):
rows_total += 1
mapped = build_row_after_mapping(csv_row, fm, tc, module="weight")
d = coerce_date(mapped.get("date"))
w = mapped.get("weight")
note = mapped.get("note")
if d is None:
error_details.append({"row": rows_total, "error": "Datum fehlt"})
continue
@ -316,10 +317,37 @@ def _import_weight(
error_details.append({"row": rows_total, "error": "Gewicht fehlt"})
continue
try:
w = float(w)
float(w)
except (TypeError, ValueError):
error_details.append({"row": rows_total, "error": "Gewicht ungültig"})
continue
mapped["date"] = d
mapped_rows.append(mapped)
if spec:
try:
validate_import_row_processing("weight", spec, fm)
except ValueError as e:
raise ValueError(str(e)) from e
merged_rows = aggregate_mapped_rows(mapped_rows, spec)
else:
merged_rows = list(mapped_rows)
inserted = 0
updated = 0
new_entries = 0
for merged in merged_rows:
d = coerce_date(merged.get("date"))
w = merged.get("weight")
note = merged.get("note")
if d is None:
continue
if w is None:
continue
try:
w = float(w)
except (TypeError, ValueError):
continue
iso = d.isoformat()
cur.execute(
"SELECT id FROM weight_log WHERE profile_id=%s AND date=%s",
@ -496,13 +524,14 @@ def _import_vitals_baseline(
has_header: bool,
fm: dict,
tc: dict | None,
mapping: dict[str, Any],
error_details: list,
affected_ids: dict,
) -> dict[str, int]:
spec = resolve_import_row_processing("vitals_baseline", mapping)
mapped_rows: list[dict[str, Any]] = []
rows_total = 0
inserted = 0
updated = 0
skipped = 0
skipped_prefilter = 0
for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header):
rows_total += 1
mapped = build_row_after_mapping(csv_row, fm, tc, module="vitals_baseline")
@ -515,6 +544,33 @@ def _import_vitals_baseline(
vo2 = _v_safe_float(mapped.get("vo2_max"))
spo2 = _v_safe_int(mapped.get("spo2"))
resp = _v_safe_float(mapped.get("respiratory_rate"))
if not any(x is not None for x in (rhr, hrv, vo2, spo2, resp)):
skipped_prefilter += 1
continue
mapped["date"] = d
mapped_rows.append(mapped)
if spec:
try:
validate_import_row_processing("vitals_baseline", spec, fm)
except ValueError as e:
raise ValueError(str(e)) from e
merged_rows = aggregate_mapped_rows(mapped_rows, spec)
else:
merged_rows = list(mapped_rows)
inserted = 0
updated = 0
skipped = skipped_prefilter
for merged in merged_rows:
d = coerce_date(merged.get("date"))
if d is None:
continue
rhr = _v_safe_int(merged.get("resting_hr"))
hrv = _v_safe_int(merged.get("hrv"))
vo2 = _v_safe_float(merged.get("vo2_max"))
spo2 = _v_safe_int(merged.get("spo2"))
resp = _v_safe_float(merged.get("respiratory_rate"))
if not any(x is not None for x in (rhr, hrv, vo2, spo2, resp)):
skipped += 1
continue

View File

@ -69,6 +69,16 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
},
"duplicate_key": ["profile_id", "date"],
"duplicate_strategy": "update",
"import_row_processing_default": {
"group_by": ["date"],
"aggregates": {
"resting_hr": "mean",
"hrv": "mean",
"vo2_max": "mean",
"spo2": "mean",
"respiratory_rate": "mean",
},
},
},
"blood_pressure": {
"table": "blood_pressure_log",
@ -92,6 +102,14 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
},
"duplicate_key": ["profile_id", "date"],
"duplicate_strategy": "update",
# Mehrere CSV-Zeilen pro Tag → ein Eintrag (letzte Zeile im Export zählt)
"import_row_processing_default": {
"group_by": ["date"],
"aggregates": {
"weight": "last",
"note": "last",
},
},
},
}

View File

@ -130,6 +130,45 @@ def test_run_universal_import_vitals_baseline_upsert_insert_path():
assert any("INSERT INTO vitals_baseline" in q[0] for q in cur.executes)
def test_run_universal_import_vitals_baseline_two_rows_same_day_averages():
text = (
"Start,Resting Heart Rate,Heart Rate Variability,VO2 Max\n"
"2024-01-15 07:00:00,50,40,42.0\n"
"2024-01-15 18:00:00,60,50,43.0\n"
)
mapping = {
"delimiter": ",",
"has_header": True,
"field_mappings": {
"Start": "date",
"Resting Heart Rate": "resting_hr",
"Heart Rate Variability": "hrv",
"VO2 Max": "vo2_max",
},
"type_conversions": {
"date": {
"type": "datetime",
"format": "yyyy-mm-dd HH:MM:SS",
"extract": "date_only",
"flexible": True,
},
"resting_hr": {"type": "int", "flexible": True},
"hrv": {"type": "int", "flexible": True},
"vo2_max": {"type": "float", "decimal_separator": ".", "flexible": True},
},
}
cur = _SeqCursor([{"inserted": True, "id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"}])
out = run_universal_csv_import(cur, PID, "vitals_baseline", text, "v2.csv", mapping)
assert out["rows_total"] == 2
assert out["rows_imported"] == 1
upsert = [q for q in cur.executes if "INSERT INTO vitals_baseline" in q[0]]
assert len(upsert) == 1
params = upsert[0][1]
assert params[2] == 55
assert params[3] == 45
assert params[4] == 42.5
def test_run_universal_import_activity_garmin_time_plus_date_columns(monkeypatch):
"""Datum in eigener Spalte, Uhrzeit wie bei Garmin nur als Uhrzeit."""
monkeypatch.setattr(
@ -215,3 +254,25 @@ def test_run_universal_import_nutrition_two_rows_same_day_aggregates_to_one_row(
assert params[4] == 15.0
assert params[5] == 30.0
assert params[6] == 45.0
def test_run_universal_import_weight_two_rows_same_day_last_value():
"""Mehrere Gewichtszeilen pro Tag → Standard: letzter Wert in der Datei."""
text = "Date,Weight\n2024-01-15,85.0\n2024-01-15,83.5\n"
mapping = {
"delimiter": ",",
"has_header": True,
"field_mappings": {"Date": "date", "Weight": "weight"},
"type_conversions": {
"date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True},
"weight": {"type": "float", "decimal_separator": ".", "flexible": True},
},
}
cur = _SeqCursor([None])
out = run_universal_csv_import(cur, PID, "weight", text, "w.csv", mapping)
assert out["rows_total"] == 2
assert out["rows_imported"] == 1
insert_sqls = [q for q in cur.executes if "INSERT INTO weight_log" in q[0]]
assert len(insert_sqls) == 1
params = insert_sqls[0][1]
assert params[3] == 83.5

View File

@ -65,3 +65,24 @@ def test_resolve_none_uses_nutrition_default():
spec = resolve_import_row_processing("nutrition", {})
assert spec is not None
assert "date" in (spec.get("group_by") or [])
def test_resolve_weight_default_uses_last():
spec = resolve_import_row_processing("weight", {})
assert spec is not None
assert spec.get("aggregates", {}).get("weight") == "last"
assert spec.get("aggregates", {}).get("note") == "last"
def test_validate_weight_import_row_processing_ok():
validate_import_row_processing(
"weight",
{"group_by": ["date"], "aggregates": {"weight": "mean"}},
{"D": "date", "W": "weight"},
)
def test_resolve_vitals_baseline_default_uses_mean():
spec = resolve_import_row_processing("vitals_baseline", {})
assert spec is not None
assert spec.get("aggregates", {}).get("resting_hr") == "mean"