feat(csv-import): Enhance CSV import processing and validation
- Updated the CSV import logic to support new row processing specifications for weight and vitals baseline, allowing for better data aggregation and validation. - Implemented handling for multiple rows on the same day, enabling aggregation of values such as averages for vitals and last values for weight. - Enhanced test coverage for the new import functionalities, ensuring correct behavior during data processing and validation. - Refactored the module registry to include default import row processing options for better flexibility in handling CSV data.
This commit is contained in:
parent
c0fcdea1fe
commit
e35d167055
|
|
@ -34,7 +34,7 @@ jobs:
|
|||
docker compose -f "$COMPOSE_FILE" exec -T backend sh -lc "
|
||||
pip install -r /app/requirements-dev.txt &&
|
||||
cd /app &&
|
||||
python -m pytest tests -m 'not slow' -q --tb=short
|
||||
python -m pytest tests -m 'not slow' -ra -vv --tb=short
|
||||
"
|
||||
|
||||
lint-backend:
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ def run_universal_csv_import(
|
|||
bool(has_header),
|
||||
fm,
|
||||
tc,
|
||||
mapping,
|
||||
error_details,
|
||||
affected_ids,
|
||||
)
|
||||
|
|
@ -169,6 +170,7 @@ def run_universal_csv_import(
|
|||
bool(has_header),
|
||||
fm,
|
||||
tc,
|
||||
mapping,
|
||||
error_details,
|
||||
affected_ids,
|
||||
)
|
||||
|
|
@ -296,19 +298,18 @@ def _import_weight(
|
|||
has_header: bool,
|
||||
fm: dict,
|
||||
tc: dict | None,
|
||||
mapping: dict[str, Any],
|
||||
error_details: list,
|
||||
affected_ids: dict,
|
||||
) -> dict[str, int]:
|
||||
spec = resolve_import_row_processing("weight", mapping)
|
||||
mapped_rows: list[dict[str, Any]] = []
|
||||
rows_total = 0
|
||||
inserted = 0
|
||||
updated = 0
|
||||
new_entries = 0
|
||||
for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header):
|
||||
rows_total += 1
|
||||
mapped = build_row_after_mapping(csv_row, fm, tc, module="weight")
|
||||
d = coerce_date(mapped.get("date"))
|
||||
w = mapped.get("weight")
|
||||
note = mapped.get("note")
|
||||
if d is None:
|
||||
error_details.append({"row": rows_total, "error": "Datum fehlt"})
|
||||
continue
|
||||
|
|
@ -316,10 +317,37 @@ def _import_weight(
|
|||
error_details.append({"row": rows_total, "error": "Gewicht fehlt"})
|
||||
continue
|
||||
try:
|
||||
w = float(w)
|
||||
float(w)
|
||||
except (TypeError, ValueError):
|
||||
error_details.append({"row": rows_total, "error": "Gewicht ungültig"})
|
||||
continue
|
||||
mapped["date"] = d
|
||||
mapped_rows.append(mapped)
|
||||
|
||||
if spec:
|
||||
try:
|
||||
validate_import_row_processing("weight", spec, fm)
|
||||
except ValueError as e:
|
||||
raise ValueError(str(e)) from e
|
||||
merged_rows = aggregate_mapped_rows(mapped_rows, spec)
|
||||
else:
|
||||
merged_rows = list(mapped_rows)
|
||||
|
||||
inserted = 0
|
||||
updated = 0
|
||||
new_entries = 0
|
||||
for merged in merged_rows:
|
||||
d = coerce_date(merged.get("date"))
|
||||
w = merged.get("weight")
|
||||
note = merged.get("note")
|
||||
if d is None:
|
||||
continue
|
||||
if w is None:
|
||||
continue
|
||||
try:
|
||||
w = float(w)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
iso = d.isoformat()
|
||||
cur.execute(
|
||||
"SELECT id FROM weight_log WHERE profile_id=%s AND date=%s",
|
||||
|
|
@ -496,13 +524,14 @@ def _import_vitals_baseline(
|
|||
has_header: bool,
|
||||
fm: dict,
|
||||
tc: dict | None,
|
||||
mapping: dict[str, Any],
|
||||
error_details: list,
|
||||
affected_ids: dict,
|
||||
) -> dict[str, int]:
|
||||
spec = resolve_import_row_processing("vitals_baseline", mapping)
|
||||
mapped_rows: list[dict[str, Any]] = []
|
||||
rows_total = 0
|
||||
inserted = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
skipped_prefilter = 0
|
||||
for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header):
|
||||
rows_total += 1
|
||||
mapped = build_row_after_mapping(csv_row, fm, tc, module="vitals_baseline")
|
||||
|
|
@ -515,6 +544,33 @@ def _import_vitals_baseline(
|
|||
vo2 = _v_safe_float(mapped.get("vo2_max"))
|
||||
spo2 = _v_safe_int(mapped.get("spo2"))
|
||||
resp = _v_safe_float(mapped.get("respiratory_rate"))
|
||||
if not any(x is not None for x in (rhr, hrv, vo2, spo2, resp)):
|
||||
skipped_prefilter += 1
|
||||
continue
|
||||
mapped["date"] = d
|
||||
mapped_rows.append(mapped)
|
||||
|
||||
if spec:
|
||||
try:
|
||||
validate_import_row_processing("vitals_baseline", spec, fm)
|
||||
except ValueError as e:
|
||||
raise ValueError(str(e)) from e
|
||||
merged_rows = aggregate_mapped_rows(mapped_rows, spec)
|
||||
else:
|
||||
merged_rows = list(mapped_rows)
|
||||
|
||||
inserted = 0
|
||||
updated = 0
|
||||
skipped = skipped_prefilter
|
||||
for merged in merged_rows:
|
||||
d = coerce_date(merged.get("date"))
|
||||
if d is None:
|
||||
continue
|
||||
rhr = _v_safe_int(merged.get("resting_hr"))
|
||||
hrv = _v_safe_int(merged.get("hrv"))
|
||||
vo2 = _v_safe_float(merged.get("vo2_max"))
|
||||
spo2 = _v_safe_int(merged.get("spo2"))
|
||||
resp = _v_safe_float(merged.get("respiratory_rate"))
|
||||
if not any(x is not None for x in (rhr, hrv, vo2, spo2, resp)):
|
||||
skipped += 1
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -69,6 +69,16 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
|
|||
},
|
||||
"duplicate_key": ["profile_id", "date"],
|
||||
"duplicate_strategy": "update",
|
||||
"import_row_processing_default": {
|
||||
"group_by": ["date"],
|
||||
"aggregates": {
|
||||
"resting_hr": "mean",
|
||||
"hrv": "mean",
|
||||
"vo2_max": "mean",
|
||||
"spo2": "mean",
|
||||
"respiratory_rate": "mean",
|
||||
},
|
||||
},
|
||||
},
|
||||
"blood_pressure": {
|
||||
"table": "blood_pressure_log",
|
||||
|
|
@ -92,6 +102,14 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
|
|||
},
|
||||
"duplicate_key": ["profile_id", "date"],
|
||||
"duplicate_strategy": "update",
|
||||
# Mehrere CSV-Zeilen pro Tag → ein Eintrag (letzte Zeile im Export zählt)
|
||||
"import_row_processing_default": {
|
||||
"group_by": ["date"],
|
||||
"aggregates": {
|
||||
"weight": "last",
|
||||
"note": "last",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -130,6 +130,45 @@ def test_run_universal_import_vitals_baseline_upsert_insert_path():
|
|||
assert any("INSERT INTO vitals_baseline" in q[0] for q in cur.executes)
|
||||
|
||||
|
||||
def test_run_universal_import_vitals_baseline_two_rows_same_day_averages():
|
||||
text = (
|
||||
"Start,Resting Heart Rate,Heart Rate Variability,VO2 Max\n"
|
||||
"2024-01-15 07:00:00,50,40,42.0\n"
|
||||
"2024-01-15 18:00:00,60,50,43.0\n"
|
||||
)
|
||||
mapping = {
|
||||
"delimiter": ",",
|
||||
"has_header": True,
|
||||
"field_mappings": {
|
||||
"Start": "date",
|
||||
"Resting Heart Rate": "resting_hr",
|
||||
"Heart Rate Variability": "hrv",
|
||||
"VO2 Max": "vo2_max",
|
||||
},
|
||||
"type_conversions": {
|
||||
"date": {
|
||||
"type": "datetime",
|
||||
"format": "yyyy-mm-dd HH:MM:SS",
|
||||
"extract": "date_only",
|
||||
"flexible": True,
|
||||
},
|
||||
"resting_hr": {"type": "int", "flexible": True},
|
||||
"hrv": {"type": "int", "flexible": True},
|
||||
"vo2_max": {"type": "float", "decimal_separator": ".", "flexible": True},
|
||||
},
|
||||
}
|
||||
cur = _SeqCursor([{"inserted": True, "id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"}])
|
||||
out = run_universal_csv_import(cur, PID, "vitals_baseline", text, "v2.csv", mapping)
|
||||
assert out["rows_total"] == 2
|
||||
assert out["rows_imported"] == 1
|
||||
upsert = [q for q in cur.executes if "INSERT INTO vitals_baseline" in q[0]]
|
||||
assert len(upsert) == 1
|
||||
params = upsert[0][1]
|
||||
assert params[2] == 55
|
||||
assert params[3] == 45
|
||||
assert params[4] == 42.5
|
||||
|
||||
|
||||
def test_run_universal_import_activity_garmin_time_plus_date_columns(monkeypatch):
|
||||
"""Datum in eigener Spalte, Uhrzeit wie bei Garmin nur als Uhrzeit."""
|
||||
monkeypatch.setattr(
|
||||
|
|
@ -215,3 +254,25 @@ def test_run_universal_import_nutrition_two_rows_same_day_aggregates_to_one_row(
|
|||
assert params[4] == 15.0
|
||||
assert params[5] == 30.0
|
||||
assert params[6] == 45.0
|
||||
|
||||
|
||||
def test_run_universal_import_weight_two_rows_same_day_last_value():
|
||||
"""Mehrere Gewichtszeilen pro Tag → Standard: letzter Wert in der Datei."""
|
||||
text = "Date,Weight\n2024-01-15,85.0\n2024-01-15,83.5\n"
|
||||
mapping = {
|
||||
"delimiter": ",",
|
||||
"has_header": True,
|
||||
"field_mappings": {"Date": "date", "Weight": "weight"},
|
||||
"type_conversions": {
|
||||
"date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True},
|
||||
"weight": {"type": "float", "decimal_separator": ".", "flexible": True},
|
||||
},
|
||||
}
|
||||
cur = _SeqCursor([None])
|
||||
out = run_universal_csv_import(cur, PID, "weight", text, "w.csv", mapping)
|
||||
assert out["rows_total"] == 2
|
||||
assert out["rows_imported"] == 1
|
||||
insert_sqls = [q for q in cur.executes if "INSERT INTO weight_log" in q[0]]
|
||||
assert len(insert_sqls) == 1
|
||||
params = insert_sqls[0][1]
|
||||
assert params[3] == 83.5
|
||||
|
|
|
|||
|
|
@ -65,3 +65,24 @@ def test_resolve_none_uses_nutrition_default():
|
|||
spec = resolve_import_row_processing("nutrition", {})
|
||||
assert spec is not None
|
||||
assert "date" in (spec.get("group_by") or [])
|
||||
|
||||
|
||||
def test_resolve_weight_default_uses_last():
|
||||
spec = resolve_import_row_processing("weight", {})
|
||||
assert spec is not None
|
||||
assert spec.get("aggregates", {}).get("weight") == "last"
|
||||
assert spec.get("aggregates", {}).get("note") == "last"
|
||||
|
||||
|
||||
def test_validate_weight_import_row_processing_ok():
|
||||
validate_import_row_processing(
|
||||
"weight",
|
||||
{"group_by": ["date"], "aggregates": {"weight": "mean"}},
|
||||
{"D": "date", "W": "weight"},
|
||||
)
|
||||
|
||||
|
||||
def test_resolve_vitals_baseline_default_uses_mean():
|
||||
spec = resolve_import_row_processing("vitals_baseline", {})
|
||||
assert spec is not None
|
||||
assert spec.get("aggregates", {}).get("resting_hr") == "mean"
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user