- Updated the aggregate_mapped_rows function to support multiple row policies, allowing for flexible handling of duplicate keys during CSV imports. - Introduced deduplication of identical rows before aggregation, improving data integrity. - Enhanced validation for multi_row_policy and dedupe_identical_rows in import_row_processing specifications. - Updated the AdminCsvTemplateEditorPage to include options for multi-row policies and deduplication settings, improving user experience in template management. - Added comprehensive tests to validate new aggregation behaviors and ensure correct error handling for multiple rows.
156 lines
4.5 KiB
Python
156 lines
4.5 KiB
Python
"""Tests für CSV-Zeilenaggregation (import_row_processing)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime as dt
|
|
|
|
import pytest
|
|
|
|
from csv_parser.import_row_processing import (
|
|
aggregate_mapped_rows,
|
|
resolve_import_row_processing,
|
|
validate_import_row_processing,
|
|
)
|
|
|
|
|
|
def test_validate_rejects_unknown_aggregate():
|
|
with pytest.raises(ValueError, match="ungültige Operation"):
|
|
validate_import_row_processing(
|
|
"nutrition",
|
|
{"group_by": ["date"], "aggregates": {"kcal": "bogus"}},
|
|
{"Kal": "date", "E": "kcal"},
|
|
)
|
|
|
|
|
|
def test_validate_group_by_must_be_mapped():
|
|
with pytest.raises(ValueError, match="keiner CSV-Spalte zugeordnet"):
|
|
validate_import_row_processing(
|
|
"nutrition",
|
|
{"group_by": ["date"], "aggregates": {"kcal": "sum"}},
|
|
{"Kal": "kcal"}, # date nicht gemappt
|
|
)
|
|
|
|
|
|
def test_aggregate_mapped_rows_sums_same_group():
|
|
d = dt.date(2024, 1, 15)
|
|
rows = [
|
|
{"date": d, "kcal": 500.0, "protein_g": 20},
|
|
{"date": d, "kcal": 300.0, "protein_g": 15},
|
|
]
|
|
spec = {"group_by": ["date"], "aggregates": {"kcal": "sum", "protein_g": "sum"}}
|
|
out, err = aggregate_mapped_rows(rows, spec)
|
|
assert err == []
|
|
assert len(out) == 1
|
|
assert out[0]["kcal"] == 800.0
|
|
assert out[0]["protein_g"] == 35
|
|
|
|
|
|
def test_aggregate_mapped_rows_reject_second_group():
|
|
d = dt.date(2024, 1, 15)
|
|
rows = [
|
|
{"date": d, "kcal": 100.0},
|
|
{"date": d, "kcal": 200.0},
|
|
]
|
|
spec = {
|
|
"group_by": ["date"],
|
|
"aggregates": {"kcal": "sum"},
|
|
"multi_row_policy": "reject",
|
|
}
|
|
out, err = aggregate_mapped_rows(rows, spec)
|
|
assert out == []
|
|
assert len(err) == 1
|
|
assert err[0].get("error") == "mehrere_zeilen_pro_schluessel"
|
|
assert err[0].get("rows_in_group") == 2
|
|
|
|
|
|
def test_aggregate_mapped_rows_first_row_no_merge():
|
|
d = dt.date(2024, 1, 15)
|
|
rows = [
|
|
{"date": d, "kcal": 100.0},
|
|
{"date": d, "kcal": 999.0},
|
|
]
|
|
spec = {
|
|
"group_by": ["date"],
|
|
"aggregates": {"kcal": "sum"},
|
|
"multi_row_policy": "first_row",
|
|
}
|
|
out, err = aggregate_mapped_rows(rows, spec)
|
|
assert err == []
|
|
assert len(out) == 1
|
|
assert out[0]["kcal"] == 100.0
|
|
|
|
|
|
def test_dedupe_identical_rows_before_group():
|
|
d = dt.date(2024, 1, 15)
|
|
rows = [
|
|
{"date": d, "kcal": 50.0},
|
|
{"date": d, "kcal": 50.0},
|
|
{"date": d, "kcal": 50.0},
|
|
]
|
|
spec = {
|
|
"group_by": ["date"],
|
|
"aggregates": {"kcal": "sum"},
|
|
"dedupe_identical_rows": True,
|
|
}
|
|
out, err = aggregate_mapped_rows(rows, spec)
|
|
assert err == []
|
|
assert len(out) == 1
|
|
assert out[0]["kcal"] == 50.0
|
|
|
|
|
|
def test_validate_multi_row_policy():
|
|
with pytest.raises(ValueError, match="multi_row_policy"):
|
|
validate_import_row_processing(
|
|
"nutrition",
|
|
{
|
|
"group_by": ["date"],
|
|
"aggregates": {"kcal": "sum"},
|
|
"multi_row_policy": "nope",
|
|
},
|
|
{"D": "date", "K": "kcal"},
|
|
)
|
|
|
|
|
|
def test_resolve_explicit_overrides_default():
|
|
m = {
|
|
"import_row_processing": {"group_by": ["date"], "aggregates": {"kcal": "mean"}},
|
|
}
|
|
spec = resolve_import_row_processing("nutrition", m)
|
|
assert spec is not None
|
|
assert spec["aggregates"]["kcal"] == "mean"
|
|
|
|
|
|
def test_resolve_empty_dict_falls_back_to_module_default():
|
|
m: dict = {"import_row_processing": {}}
|
|
spec = resolve_import_row_processing("nutrition", m)
|
|
assert spec is not None
|
|
assert spec["group_by"] == ["date"]
|
|
assert spec["aggregates"]["kcal"] == "sum"
|
|
|
|
|
|
def test_resolve_none_uses_nutrition_default():
|
|
spec = resolve_import_row_processing("nutrition", {})
|
|
assert spec is not None
|
|
assert "date" in (spec.get("group_by") or [])
|
|
|
|
|
|
def test_resolve_weight_default_uses_last():
|
|
spec = resolve_import_row_processing("weight", {})
|
|
assert spec is not None
|
|
assert spec.get("aggregates", {}).get("weight") == "last"
|
|
assert spec.get("aggregates", {}).get("note") == "last"
|
|
|
|
|
|
def test_validate_weight_import_row_processing_ok():
|
|
validate_import_row_processing(
|
|
"weight",
|
|
{"group_by": ["date"], "aggregates": {"weight": "mean"}},
|
|
{"D": "date", "W": "weight"},
|
|
)
|
|
|
|
|
|
def test_resolve_vitals_baseline_default_uses_mean():
|
|
spec = resolve_import_row_processing("vitals_baseline", {})
|
|
assert spec is not None
|
|
assert spec.get("aggregates", {}).get("resting_hr") == "mean"
|