Universal CSV Importer #70

Merged
Lars merged 54 commits from develop into main 2026-04-11 07:06:47 +02:00
5 changed files with 112 additions and 11 deletions
Showing only changes of commit a9bd3faabb - Show all commits

View File

@ -822,6 +822,9 @@ def _import_activity(
start_key = start_raw.strftime("%Y-%m-%d %H:%M:%S")
if date_d is None:
date_d = start_raw.date()
elif isinstance(start_raw, dt.date):
date_d = start_raw
start_key = f"{start_raw.isoformat()} 00:00:00"
elif isinstance(start_raw, dt.time):
if date_d is None:
error_details.append(

View File

@ -168,6 +168,12 @@ def _collect_strptime_date_formats(spec: Mapping[str, Any], *, for_datetime: boo
if p2 not in seen:
seen.add(p2)
out.append(p2)
elif for_datetime and p.endswith(":%S"):
# z. B. Apple Health „2026-04-09 16:48“ ohne Sekunden
p_short = p[:-3]
if p_short not in seen:
seen.add(p_short)
out.append(p_short)
primary = spec.get("format")
if primary:
@ -204,18 +210,43 @@ def _try_strptime_trim_time(s: str, patterns: Sequence[str]) -> dt.datetime | No
return _try_strptime(s, patterns)
def _normalize_locale_date_months(s: str) -> str:
"""
Omron Connect / Berichte: «10 Apr. 2026», «31 März 2026» ohne DEEN scheitert dateutil.
"""
if not s:
return s
out = s
for pat, rep in (
(r"März", "March"),
(r"Maerz", "March"),
(r"Januar", "January"),
(r"Februar", "February"),
(r"Oktober", "October"),
(r"Dezember", "December"),
(r"Juni", "June"),
(r"Juli", "July"),
(r"\bMai\b", "May"),
):
out = re.sub(pat, rep, out, flags=re.IGNORECASE)
return out
def _dateutil_parse(s: str, spec: Mapping[str, Any]) -> dt.datetime | None:
s_trim = s.strip()
dayfirst_opt = spec.get("dayfirst")
# ISO YYYY-MM-DD: dayfirst=True vertauscht Monat/Tag (09.04. → 04.09.)
iso_ymd_prefix = bool(re.match(r"^\d{4}-\d{2}-\d{2}(\D|$)", s_trim))
tries: list[bool | None]
if dayfirst_opt is True:
tries = [True]
elif dayfirst_opt is False:
tries = [False]
else:
tries = [True, False]
tries = [False, True] if iso_ymd_prefix else [True, False]
for df in tries:
try:
return dateutil_parser.parse(s, dayfirst=df)
return dateutil_parser.parse(s_trim, dayfirst=df)
except (ValueError, TypeError, OverflowError):
continue
return None
@ -223,12 +254,18 @@ def _dateutil_parse(s: str, spec: Mapping[str, Any]) -> dt.datetime | None:
def _parse_date_typed(s: str, spec: Mapping[str, Any]) -> dt.date | dt.datetime:
extract = spec.get("extract", "date_only")
s0 = _normalize_locale_date_months(s.strip())
patterns = _collect_strptime_date_formats(spec, for_datetime=False)
part = _try_strptime_trim_time(s, patterns) if patterns else None
part = _try_strptime_trim_time(s0, patterns) if patterns else None
if part is None:
part = _try_strptime(s, _collect_strptime_date_formats(spec, for_datetime=True))
part = _try_strptime(s0, _collect_strptime_date_formats(spec, for_datetime=True))
if part is None and (bool(spec.get("flexible")) or spec.get("formats")):
part = _dateutil_parse(s, spec)
part = _dateutil_parse(s0, spec)
if part is None:
merged: dict[str, Any] = {**dict(spec), "flexible": True}
if "dayfirst" not in merged:
merged["dayfirst"] = True
part = _dateutil_parse(s0, merged)
if part is None:
fmt_key = str(spec.get("format", ""))
raise ValueError(f"Datum nicht parsbar: {fmt_key} / {s!r}")
@ -238,10 +275,18 @@ def _parse_date_typed(s: str, spec: Mapping[str, Any]) -> dt.date | dt.datetime:
def _parse_datetime_typed(s: str, spec: Mapping[str, Any]) -> dt.datetime:
s0 = _normalize_locale_date_months(s.strip())
patterns = _collect_strptime_date_formats(spec, for_datetime=True)
part = _try_strptime(s, patterns)
part = _try_strptime(s0, patterns)
if part is None and (bool(spec.get("flexible")) or spec.get("formats")):
du = _dateutil_parse(s, spec)
du = _dateutil_parse(s0, spec)
if du:
part = du
if part is None:
merged: dict[str, Any] = {**dict(spec), "flexible": True}
if "dayfirst" not in merged:
merged["dayfirst"] = True
du = _dateutil_parse(s0, merged)
if du:
part = du
if part is None:

View File

@ -124,8 +124,8 @@ SELECT
"Heart Rate Average (bpm)": "hr_avg"
}'::JSONB,
'{
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time"},
"end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS"},
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time", "flexible": true},
"end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": true},
"duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"},
"distance_km": {"type": "float", "decimal_separator": "."},
"kcal_active": {"type": "float", "decimal_separator": "."},
@ -157,8 +157,8 @@ SELECT
"Durchschnittliche Herzfrequenz (bpm)": "hr_avg"
}'::JSONB,
'{
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time"},
"end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS"},
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time", "flexible": true},
"end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": true},
"duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"},
"distance_km": {"type": "float", "decimal_separator": ","},
"kcal_active": {"type": "float", "decimal_separator": ","},

View File

@ -0,0 +1,26 @@
-- Apple Health Workout-CSV: Zeit oft ohne Sekunden (HH:MM); dateutil dayfirst bricht ISO YYYY-MM-DD.
-- type_converter: zusätzliche Patterns + ISO-reihenfolge in _dateutil_parse.
-- Bestehende System-Vorlagen: flexible für Start/End (idempotent).
UPDATE csv_field_mappings
SET type_conversions = jsonb_set(
jsonb_set(
COALESCE(type_conversions, '{}'::jsonb),
'{start_time}',
COALESCE(type_conversions->'start_time', '{}'::jsonb) || '{"flexible": true}'::jsonb,
true
),
'{end_time}',
COALESCE(type_conversions->'end_time', '{}'::jsonb) || '{"flexible": true}'::jsonb,
true
)
WHERE is_system = true
AND profile_id IS NULL
AND module = 'activity'
AND mapping_name IN (
'Apple Health Workout Export (English)',
'Apple Health Workout Export (Deutsch)'
)
AND type_conversions IS NOT NULL
AND type_conversions ? 'start_time'
AND type_conversions ? 'end_time';

View File

@ -226,6 +226,14 @@ def test_build_row_after_mapping_column_order_independent():
assert r1["resting_hr"] == 58
def test_omron_report_date_formats_without_flexible_flag():
"""Omron «Bericht»-Export: engl. Month abbrev + deutscher Monatsname; Vorlage oft nur dd.mm.yyyy."""
spec = {"type": "date", "format": "dd.mm.yyyy"}
assert convert_value("10 Apr. 2026", "measured_date", spec).isoformat() == "2026-04-10"
assert convert_value("31 März 2026", "measured_date", spec).isoformat() == "2026-03-31"
assert convert_value("11 März 2026", "measured_date", spec).isoformat() == "2026-03-11"
def test_int_flexible_german_decimal_rounds():
"""Apple-DE: HRV/SpO2 als «37,26» / «95,22» — nicht 3726 aus Ziffern konkatenieren."""
spec = {"type": "int", "flexible": True}
@ -239,6 +247,25 @@ def test_datetime_flexible():
assert dtv.month == 1 and dtv.day == 15 and dtv.hour == 14
def test_apple_workout_datetime_without_seconds_iso_not_swapped():
"""Apple Export: 2026-04-09 16:48 — ohne :SS; kein dayfirst-Fehlparser (09↔04)."""
spec = {
"type": "datetime",
"format": "yyyy-mm-dd HH:MM:SS",
"extract": "date_and_time",
}
dtv = convert_value("2026-04-09 16:48", "start_time", spec, module="activity")
assert dtv.year == 2026 and dtv.month == 4 and dtv.day == 9
assert dtv.hour == 16 and dtv.minute == 48
def test_iso_yyyy_mm_dd_dateutil_fallback_not_dayfirst_swapped():
"""Nur dateutil: ISO-Datum darf mit Default-dayfirst nicht vertauscht werden."""
spec = {"type": "date", "format": "dd.mm.yyyy", "flexible": True}
d = convert_value("2026-04-09", "d", spec)
assert d.month == 4 and d.day == 9
def test_source_unit_choices_include_custom_at_end():
opts = source_unit_choices_for_field("nutrition", "protein_g")
assert opts[-1]["id"] == "custom"