From a9bd3faabbcb4b207cdea61ae95a05bb0fccddb5 Mon Sep 17 00:00:00 2001 From: Lars Date: Fri, 10 Apr 2026 16:52:11 +0200 Subject: [PATCH] =?UTF-8?q?Bug=20Fix=20f=C3=BCr=20type=5Fconverter.py=20un?= =?UTF-8?q?d=20executor.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/csv_parser/executor.py | 3 + backend/csv_parser/type_converter.py | 59 ++++++++++++++++--- .../043_csv_parser_seed_templates.sql | 8 +-- ...0_csv_activity_apple_datetime_flexible.sql | 26 ++++++++ backend/tests/test_csv_parser_core.py | 27 +++++++++ 5 files changed, 112 insertions(+), 11 deletions(-) create mode 100644 backend/migrations/050_csv_activity_apple_datetime_flexible.sql diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index 540c0ba..be3d64d 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -822,6 +822,9 @@ def _import_activity( start_key = start_raw.strftime("%Y-%m-%d %H:%M:%S") if date_d is None: date_d = start_raw.date() + elif isinstance(start_raw, dt.date): + date_d = start_raw + start_key = f"{start_raw.isoformat()} 00:00:00" elif isinstance(start_raw, dt.time): if date_d is None: error_details.append( diff --git a/backend/csv_parser/type_converter.py b/backend/csv_parser/type_converter.py index 4d02d91..90af16d 100644 --- a/backend/csv_parser/type_converter.py +++ b/backend/csv_parser/type_converter.py @@ -168,6 +168,12 @@ def _collect_strptime_date_formats(spec: Mapping[str, Any], *, for_datetime: boo if p2 not in seen: seen.add(p2) out.append(p2) + elif for_datetime and p.endswith(":%S"): + # z. B. Apple Health „2026-04-09 16:48“ ohne Sekunden + p_short = p[:-3] + if p_short not in seen: + seen.add(p_short) + out.append(p_short) primary = spec.get("format") if primary: @@ -204,18 +210,43 @@ def _try_strptime_trim_time(s: str, patterns: Sequence[str]) -> dt.datetime | No return _try_strptime(s, patterns) +def _normalize_locale_date_months(s: str) -> str: + """ + Omron Connect / Berichte: «10 Apr. 2026», «31 März 2026» — ohne DE→EN scheitert dateutil. + """ + if not s: + return s + out = s + for pat, rep in ( + (r"März", "March"), + (r"Maerz", "March"), + (r"Januar", "January"), + (r"Februar", "February"), + (r"Oktober", "October"), + (r"Dezember", "December"), + (r"Juni", "June"), + (r"Juli", "July"), + (r"\bMai\b", "May"), + ): + out = re.sub(pat, rep, out, flags=re.IGNORECASE) + return out + + def _dateutil_parse(s: str, spec: Mapping[str, Any]) -> dt.datetime | None: + s_trim = s.strip() dayfirst_opt = spec.get("dayfirst") + # ISO YYYY-MM-DD: dayfirst=True vertauscht Monat/Tag (09.04. → 04.09.) + iso_ymd_prefix = bool(re.match(r"^\d{4}-\d{2}-\d{2}(\D|$)", s_trim)) tries: list[bool | None] if dayfirst_opt is True: tries = [True] elif dayfirst_opt is False: tries = [False] else: - tries = [True, False] + tries = [False, True] if iso_ymd_prefix else [True, False] for df in tries: try: - return dateutil_parser.parse(s, dayfirst=df) + return dateutil_parser.parse(s_trim, dayfirst=df) except (ValueError, TypeError, OverflowError): continue return None @@ -223,12 +254,18 @@ def _dateutil_parse(s: str, spec: Mapping[str, Any]) -> dt.datetime | None: def _parse_date_typed(s: str, spec: Mapping[str, Any]) -> dt.date | dt.datetime: extract = spec.get("extract", "date_only") + s0 = _normalize_locale_date_months(s.strip()) patterns = _collect_strptime_date_formats(spec, for_datetime=False) - part = _try_strptime_trim_time(s, patterns) if patterns else None + part = _try_strptime_trim_time(s0, patterns) if patterns else None if part is None: - part = _try_strptime(s, _collect_strptime_date_formats(spec, for_datetime=True)) + part = _try_strptime(s0, _collect_strptime_date_formats(spec, for_datetime=True)) if part is None and (bool(spec.get("flexible")) or spec.get("formats")): - part = _dateutil_parse(s, spec) + part = _dateutil_parse(s0, spec) + if part is None: + merged: dict[str, Any] = {**dict(spec), "flexible": True} + if "dayfirst" not in merged: + merged["dayfirst"] = True + part = _dateutil_parse(s0, merged) if part is None: fmt_key = str(spec.get("format", "")) raise ValueError(f"Datum nicht parsbar: {fmt_key} / {s!r}") @@ -238,10 +275,18 @@ def _parse_date_typed(s: str, spec: Mapping[str, Any]) -> dt.date | dt.datetime: def _parse_datetime_typed(s: str, spec: Mapping[str, Any]) -> dt.datetime: + s0 = _normalize_locale_date_months(s.strip()) patterns = _collect_strptime_date_formats(spec, for_datetime=True) - part = _try_strptime(s, patterns) + part = _try_strptime(s0, patterns) if part is None and (bool(spec.get("flexible")) or spec.get("formats")): - du = _dateutil_parse(s, spec) + du = _dateutil_parse(s0, spec) + if du: + part = du + if part is None: + merged: dict[str, Any] = {**dict(spec), "flexible": True} + if "dayfirst" not in merged: + merged["dayfirst"] = True + du = _dateutil_parse(s0, merged) if du: part = du if part is None: diff --git a/backend/migrations/043_csv_parser_seed_templates.sql b/backend/migrations/043_csv_parser_seed_templates.sql index 74855dd..ba843ba 100644 --- a/backend/migrations/043_csv_parser_seed_templates.sql +++ b/backend/migrations/043_csv_parser_seed_templates.sql @@ -124,8 +124,8 @@ SELECT "Heart Rate Average (bpm)": "hr_avg" }'::JSONB, '{ - "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time"}, - "end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS"}, + "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time", "flexible": true}, + "end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": true}, "duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"}, "distance_km": {"type": "float", "decimal_separator": "."}, "kcal_active": {"type": "float", "decimal_separator": "."}, @@ -157,8 +157,8 @@ SELECT "Durchschnittliche Herzfrequenz (bpm)": "hr_avg" }'::JSONB, '{ - "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time"}, - "end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS"}, + "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time", "flexible": true}, + "end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": true}, "duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"}, "distance_km": {"type": "float", "decimal_separator": ","}, "kcal_active": {"type": "float", "decimal_separator": ","}, diff --git a/backend/migrations/050_csv_activity_apple_datetime_flexible.sql b/backend/migrations/050_csv_activity_apple_datetime_flexible.sql new file mode 100644 index 0000000..618e51a --- /dev/null +++ b/backend/migrations/050_csv_activity_apple_datetime_flexible.sql @@ -0,0 +1,26 @@ +-- Apple Health Workout-CSV: Zeit oft ohne Sekunden (HH:MM); dateutil dayfirst bricht ISO YYYY-MM-DD. +-- type_converter: zusätzliche Patterns + ISO-reihenfolge in _dateutil_parse. +-- Bestehende System-Vorlagen: flexible für Start/End (idempotent). + +UPDATE csv_field_mappings +SET type_conversions = jsonb_set( + jsonb_set( + COALESCE(type_conversions, '{}'::jsonb), + '{start_time}', + COALESCE(type_conversions->'start_time', '{}'::jsonb) || '{"flexible": true}'::jsonb, + true + ), + '{end_time}', + COALESCE(type_conversions->'end_time', '{}'::jsonb) || '{"flexible": true}'::jsonb, + true +) +WHERE is_system = true + AND profile_id IS NULL + AND module = 'activity' + AND mapping_name IN ( + 'Apple Health Workout Export (English)', + 'Apple Health Workout Export (Deutsch)' + ) + AND type_conversions IS NOT NULL + AND type_conversions ? 'start_time' + AND type_conversions ? 'end_time'; diff --git a/backend/tests/test_csv_parser_core.py b/backend/tests/test_csv_parser_core.py index 54efa13..93eb18a 100644 --- a/backend/tests/test_csv_parser_core.py +++ b/backend/tests/test_csv_parser_core.py @@ -226,6 +226,14 @@ def test_build_row_after_mapping_column_order_independent(): assert r1["resting_hr"] == 58 +def test_omron_report_date_formats_without_flexible_flag(): + """Omron «Bericht»-Export: engl. Month abbrev + deutscher Monatsname; Vorlage oft nur dd.mm.yyyy.""" + spec = {"type": "date", "format": "dd.mm.yyyy"} + assert convert_value("10 Apr. 2026", "measured_date", spec).isoformat() == "2026-04-10" + assert convert_value("31 März 2026", "measured_date", spec).isoformat() == "2026-03-31" + assert convert_value("11 März 2026", "measured_date", spec).isoformat() == "2026-03-11" + + def test_int_flexible_german_decimal_rounds(): """Apple-DE: HRV/SpO2 als «37,26» / «95,22» — nicht 3726 aus Ziffern konkatenieren.""" spec = {"type": "int", "flexible": True} @@ -239,6 +247,25 @@ def test_datetime_flexible(): assert dtv.month == 1 and dtv.day == 15 and dtv.hour == 14 +def test_apple_workout_datetime_without_seconds_iso_not_swapped(): + """Apple Export: 2026-04-09 16:48 — ohne :SS; kein dayfirst-Fehlparser (09↔04).""" + spec = { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS", + "extract": "date_and_time", + } + dtv = convert_value("2026-04-09 16:48", "start_time", spec, module="activity") + assert dtv.year == 2026 and dtv.month == 4 and dtv.day == 9 + assert dtv.hour == 16 and dtv.minute == 48 + + +def test_iso_yyyy_mm_dd_dateutil_fallback_not_dayfirst_swapped(): + """Nur dateutil: ISO-Datum darf mit Default-dayfirst nicht vertauscht werden.""" + spec = {"type": "date", "format": "dd.mm.yyyy", "flexible": True} + d = convert_value("2026-04-09", "d", spec) + assert d.month == 4 and d.day == 9 + + def test_source_unit_choices_include_custom_at_end(): opts = source_unit_choices_for_field("nutrition", "protein_g") assert opts[-1]["id"] == "custom"