- Added support for new CSV import modules: sleep and vitals_baseline, expanding the import capabilities. - Implemented backend logic for handling CSV imports related to sleep and vitals baseline, including error handling and data processing. - Updated frontend components to include new modules in the CSV import interface, improving user experience. - Introduced unit tests for the new import functionalities to ensure reliability and correctness. - Enhanced existing CSV analysis features to accommodate the new modules, ensuring consistent behavior across the application.
211 lines
8.0 KiB
Python
211 lines
8.0 KiB
Python
"""
|
|
Heuristische Vorschläge für CSV field_mappings / type_conversions (Admin-Editor, Issue #21).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from copy import deepcopy
|
|
from typing import Any, Mapping
|
|
|
|
from csv_parser.core import normalize_header_for_signature
|
|
from csv_parser.module_registry import get_module_definition
|
|
|
|
# Normalisierte Header-Fragmente → DB-Feld (Substring- oder exakter Norm-Vergleich)
|
|
_MODULE_HEADER_ALIASES: dict[str, dict[str, frozenset[str]]] = {
|
|
"nutrition": {
|
|
"date": frozenset(
|
|
{"datum", "date", "tag", "day", "zeit", "timestamp", "uhrzeit", "monat", "jahr"}
|
|
),
|
|
"kcal": frozenset({"kcal", "kalorie", "calorie", "energie", "energy", "kj", "joule"}),
|
|
"protein_g": frozenset({"protein", "eiwei", "eiweiss"}),
|
|
"fat_g": frozenset({"fett", "fat", "lipid"}),
|
|
"carbs_g": frozenset({"kh", "carb", "kohlenhydr", "carbs", "sugar", "zucker"}),
|
|
},
|
|
"weight": {
|
|
"date": frozenset({"datum", "date", "tag", "day", "zeit"}),
|
|
"weight": frozenset({"gewicht", "weight", "masse", "kg", "kilo"}),
|
|
"note": frozenset({"notiz", "note", "comment", "kommentar"}),
|
|
},
|
|
"blood_pressure": {
|
|
"measured_date": frozenset({"datum", "date", "tag", "day", "messdatum"}),
|
|
"measured_time": frozenset({"zeit", "time", "uhr", "uhrzeit"}),
|
|
"systolic": frozenset({"systol", "sys", "sbp", "oberdruck"}),
|
|
"diastolic": frozenset({"diastol", "dia", "dbp", "unterdruck"}),
|
|
"pulse": frozenset({"puls", "pulse", "hr", "herz", "bpm"}),
|
|
},
|
|
"activity": {
|
|
"date": frozenset({"datum", "date", "tag", "day"}),
|
|
"start_time": frozenset({"start", "beginn", "von"}),
|
|
"end_time": frozenset({"end", "ende", "bis", "stop"}),
|
|
"activity_type": frozenset({"workout", "training", "typ", "type", "art", "aktiv"}),
|
|
"duration_min": frozenset({"dauer", "duration", "min"}),
|
|
"distance_km": frozenset({"strecke", "distance", "km", "distanz"}),
|
|
"kcal_active": frozenset({"kcal", "kalorie", "energie", "active"}),
|
|
"kcal_resting": frozenset({"ruhe", "resting"}),
|
|
"hr_avg": frozenset({"puls", "heart", "hr", "bpm", "herzfrequenz", "durchschn"}),
|
|
"hr_max": frozenset({"max", "peak"}),
|
|
},
|
|
"vitals_baseline": {
|
|
"date": frozenset({"datum", "date", "tag", "start", "zeit"}),
|
|
"resting_hr": frozenset({"ruhepuls", "resting", "rhr"}),
|
|
"hrv": frozenset({"hrv", "variabilit", "vfc"}),
|
|
"vo2_max": frozenset({"vo2"}),
|
|
"spo2": frozenset({"sauerstoff", "spo2", "oxygen"}),
|
|
"respiratory_rate": frozenset({"atem", "respiratory"}),
|
|
},
|
|
}
|
|
|
|
_DEFAULT_TYPE_CONVERSIONS: dict[str, dict[str, dict[str, Any]]] = {
|
|
"nutrition": {
|
|
"date": {"type": "date", "format": "dd.mm.yyyy HH:MM", "extract": "date_only", "flexible": True},
|
|
"kcal": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"protein_g": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"fat_g": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"carbs_g": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
},
|
|
"weight": {
|
|
"date": {"type": "date", "format": "dd.mm.yyyy", "flexible": True},
|
|
"weight": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"note": {"type": "string"},
|
|
},
|
|
"blood_pressure": {
|
|
"measured_date": {"type": "date", "format": "dd.mm.yyyy", "flexible": True},
|
|
"measured_time": {"type": "time", "format": "HH:MM", "flexible": True},
|
|
"systolic": {"type": "int", "flexible": True},
|
|
"diastolic": {"type": "int", "flexible": True},
|
|
"pulse": {"type": "int", "flexible": True},
|
|
},
|
|
"activity": {
|
|
"date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True},
|
|
"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True},
|
|
"end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True},
|
|
"activity_type": {"type": "string"},
|
|
"duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes", "flexible": True},
|
|
"distance_km": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"kcal_active": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"kcal_resting": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"hr_avg": {"type": "int", "flexible": True},
|
|
"hr_max": {"type": "int", "flexible": True},
|
|
},
|
|
"vitals_baseline": {
|
|
"date": {
|
|
"type": "datetime",
|
|
"format": "yyyy-mm-dd HH:MM:SS",
|
|
"extract": "date_only",
|
|
"flexible": True,
|
|
},
|
|
"resting_hr": {"type": "int", "flexible": True},
|
|
"hrv": {"type": "int", "flexible": True},
|
|
"vo2_max": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
"spo2": {"type": "int", "flexible": True},
|
|
"respiratory_rate": {"type": "float", "decimal_separator": "auto", "flexible": True},
|
|
},
|
|
}
|
|
|
|
|
|
def _norm_key(header: str) -> str:
|
|
return normalize_header_for_signature(header)
|
|
|
|
|
|
def _match_seed_to_db_field(header: str, seed_fm: Mapping[str, str]) -> str | None:
|
|
"""Findet Ziel-Feld, wenn Seed-Key zu diesem Header passt (exakt oder normalisiert)."""
|
|
if header in seed_fm:
|
|
v = seed_fm[header]
|
|
if v and v not in ("-", "_skip"):
|
|
return v
|
|
nh = _norm_key(header)
|
|
if nh in seed_fm:
|
|
v = seed_fm[nh]
|
|
if v and v not in ("-", "_skip"):
|
|
return v
|
|
for sk, sv in seed_fm.items():
|
|
if not sv or sv in ("-", "_skip"):
|
|
continue
|
|
if _norm_key(str(sk)) == nh:
|
|
return sv
|
|
return None
|
|
|
|
|
|
def _alias_suggest(norm: str, module: str, used: set[str]) -> str | None:
|
|
aliases = _MODULE_HEADER_ALIASES.get(module, {})
|
|
mod = get_module_definition(module)
|
|
if not mod:
|
|
return None
|
|
field_order = list(mod["fields"].keys())
|
|
for db_field in field_order:
|
|
if db_field in used:
|
|
continue
|
|
tokens = aliases.get(db_field, frozenset())
|
|
nlow = norm.lower()
|
|
if nlow == db_field or nlow.replace("_", "") == db_field.replace("_", ""):
|
|
return db_field
|
|
for tok in tokens:
|
|
if len(tok) >= 2 and tok in nlow:
|
|
return db_field
|
|
if len(tok) >= 4 and tok in norm:
|
|
return db_field
|
|
return None
|
|
|
|
|
|
def suggest_field_mappings(
|
|
headers: list[str],
|
|
module: str,
|
|
seed_fm: Mapping[str, str] | None = None,
|
|
) -> dict[str, str]:
|
|
"""
|
|
Mappt jede CSV-Spalte (Roh-Header als Key) auf DB-Feld oder '-'.
|
|
Nutzt zuerst eine passende Seed-Vorlage, dann Alias-Heuristik.
|
|
"""
|
|
if module == "sleep":
|
|
return {h: "-" for h in headers}
|
|
|
|
mod = get_module_definition(module)
|
|
if not mod:
|
|
return {h: "-" for h in headers}
|
|
|
|
fm: dict[str, str] = {h: "-" for h in headers}
|
|
used: set[str] = set()
|
|
|
|
if seed_fm:
|
|
for h in headers:
|
|
db = _match_seed_to_db_field(h, seed_fm)
|
|
if db and db not in used:
|
|
fm[h] = db
|
|
used.add(db)
|
|
|
|
for h in headers:
|
|
if fm[h] != "-":
|
|
continue
|
|
norm = _norm_key(h)
|
|
db = _alias_suggest(norm, module, used)
|
|
if db:
|
|
fm[h] = db
|
|
used.add(db)
|
|
|
|
return fm
|
|
|
|
|
|
def build_type_conversions_for_mapping(
|
|
module: str,
|
|
field_mappings: Mapping[str, str],
|
|
seed_tc: Mapping[str, Any] | None = None,
|
|
) -> dict[str, Any]:
|
|
"""type_conversions nur für zugewiesene Zielfelder; Seed überschreibt Defaults."""
|
|
if module == "sleep":
|
|
return {}
|
|
|
|
defaults = _DEFAULT_TYPE_CONVERSIONS.get(module, {})
|
|
out: dict[str, Any] = {}
|
|
targets = {v for v in field_mappings.values() if v and v not in ("-", "_skip")}
|
|
|
|
if seed_tc:
|
|
for k, v in seed_tc.items():
|
|
if k in targets and isinstance(v, dict):
|
|
out[k] = deepcopy(v)
|
|
|
|
for t in targets:
|
|
if t not in out and t in defaults:
|
|
out[t] = deepcopy(defaults[t])
|
|
|
|
return out
|