diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index d667e9f..6d7d4d1 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -524,6 +524,36 @@ def _v_safe_float(value: Any) -> float | None: return None +def diagnose_vitals_row(mapped_typed: dict[str, Any]) -> dict[str, Any]: + """Erklärt Vital-Baseline-Zeile nach Typkonvertierung (ohne DB).""" + d = coerce_date(mapped_typed.get("date")) + rhr = _v_safe_int(mapped_typed.get("resting_hr")) + hrv = _v_safe_int(mapped_typed.get("hrv")) + vo2 = _v_safe_float(mapped_typed.get("vo2_max")) + spo2 = _v_safe_int(mapped_typed.get("spo2")) + resp = _v_safe_float(mapped_typed.get("respiratory_rate")) + has_metric = any(x is not None for x in (rhr, hrv, vo2, spo2, resp)) + date_raw = mapped_typed.get("date") + return { + "date_coerced_iso": d.isoformat() if d else None, + "date_after_convert_repr": repr(date_raw), + "date_after_convert_type": type(date_raw).__name__, + "metrics": { + "resting_hr": rhr, + "hrv": hrv, + "vo2_max": vo2, + "spo2": spo2, + "respiratory_rate": resp, + }, + "would_pass_prefilter": d is not None and has_metric, + "prefilter_fail_reason": ( + "datum_fehlt" + if d is None + else ("keine_baseline_metrik" if not has_metric else None) + ), + } + + def _import_vitals_baseline( cur, profile_id: str, diff --git a/backend/csv_parser/type_converter.py b/backend/csv_parser/type_converter.py index 38fc8e5..d5ebaeb 100644 --- a/backend/csv_parser/type_converter.py +++ b/backend/csv_parser/type_converter.py @@ -402,6 +402,29 @@ def _lookup_db_field(csv_col: str, field_mappings: Mapping[str, str]) -> str | N return None +def _vitals_baseline_alias_db_field(csv_col: str) -> str | None: + """ + Apple Health: deutsch „Vitalwerte.csv“ (Breitexport) vs. schmale Vorlage + (Start / Resting Heart Rate …). Ohne Alias wählt die Analyse oft die + englische Vorlage → jede Zeile „Datum fehlt“. + Abgleich über normalisierten Header (normalize_header_for_signature). + """ + n = normalize_header_for_signature(str(csv_col)) + if n in ("datum_uhrzeit", "start", "date_time", "datetime"): + return "date" + if "ruhepuls" in n or n.startswith("resting_heart_rate"): + return "resting_hr" + if "herzfrequenzvariabilit" in n or "heart_rate_variability" in n: + return "hrv" + if "vo2" in n and "max" in n: + return "vo2_max" + if "blutsauerstoff" in n or "oxygen_saturation" in n: + return "spo2" + if "atemfrequenz" in n or "respiratory_rate" in n: + return "respiratory_rate" + return None + + def build_row_after_mapping( csv_row: Mapping[str, str], field_mappings: Mapping[str, str], @@ -416,6 +439,8 @@ def build_row_after_mapping( tc = type_conversions or {} for csv_col, raw in csv_row.items(): db_field = _lookup_db_field(str(csv_col), field_mappings) + if not db_field and module == "vitals_baseline": + db_field = _vitals_baseline_alias_db_field(csv_col) if not db_field: continue spec = tc.get(db_field) @@ -426,3 +451,79 @@ def build_row_after_mapping( except Exception: out[db_field] = None return out + + +def diagnose_row_mapping( + csv_row: Mapping[str, str], + field_mappings: Mapping[str, str], + type_conversions: Mapping[str, Any] | None, + module: str | None = None, + *, + mapped_typed: Mapping[str, Any] | None = None, + max_columns: int = 96, +) -> dict[str, Any]: + """ + Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Vital-Alias), Konvertierung pro Spalte, + Ergebnis wie build_row_after_mapping (json-freundliche Vorschau). + """ + tc = type_conversions or {} + per_column: list[dict[str, Any]] = [] + n = 0 + for csv_col, raw in csv_row.items(): + if n >= max_columns: + break + n += 1 + sc = str(csv_col) + via_t = _lookup_db_field(sc, field_mappings) + via_a = None + if not via_t and module == "vitals_baseline": + via_a = _vitals_baseline_alias_db_field(sc) + target = via_t or via_a + src = "template" if via_t else ("alias" if via_a else "none") + spec = tc.get(target) if target else None + conv_err: str | None = None + conv_preview: Any = None + if target: + try: + conv_val = convert_value( + (raw or "").strip(), + target, + spec if isinstance(spec, dict) else None, + module=module, + ) + conv_preview = conv_val.isoformat() if hasattr(conv_val, "isoformat") else conv_val + except Exception as e: + conv_err = str(e) + per_column.append( + { + "csv_column": sc, + "raw_preview": ((raw or "")[:120]), + "db_field": target, + "source": src, + "convert_error": conv_err, + "converted_preview": conv_preview, + } + ) + + src_map = ( + build_row_after_mapping(csv_row, field_mappings, type_conversions, module=module) + if mapped_typed is None + else mapped_typed + ) + mapped_preview: dict[str, Any] = {} + for k, v in src_map.items(): + mapped_preview[k] = v.isoformat() if hasattr(v, "isoformat") else v + + tmpl_keys = [ + str(k) + for k, v in field_mappings.items() + if v not in (None, "-", "_skip") + ] + + return { + "per_column": per_column, + "columns_truncated": len(csv_row) > max_columns, + "template_mapped_keys": tmpl_keys[:40], + "template_mapped_keys_truncated": len(tmpl_keys) > 40, + "mapped": mapped_preview, + } diff --git a/backend/routers/csv_import.py b/backend/routers/csv_import.py index 083ce84..5b49b6f 100644 --- a/backend/routers/csv_import.py +++ b/backend/routers/csv_import.py @@ -15,15 +15,17 @@ from auth import require_auth, check_feature_access, increment_feature_usage from feature_logger import log_feature_usage from db import get_db, get_cursor, r2d from routers.profiles import get_pid -from csv_parser.executor import run_universal_csv_import +from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import from csv_parser.core import ( decode_raw_bytes, column_signature, get_csv_import_limits, headers_signature_rank_metrics, + iter_csv_dict_rows, normalize_header_for_signature, parse_csv_sample, ) +from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping from csv_parser.field_units import source_unit_choices_for_field from csv_parser.module_registry import get_module_definition, list_modules, validate_field_mappings from csv_parser.sleep_apple_import import detect_apple_sleep_csv_format @@ -339,6 +341,70 @@ async def analyze_csv( } +@router.post("/import-diagnose") +async def csv_import_diagnose( + file: UploadFile = File(...), + mapping_id: int = Form(...), + module: Optional[str] = Form(default=None), + x_profile_id: Optional[str] = Header(default=None), + session: dict = Depends(require_auth), +): + """ + Kein Import, keine Quota: gleiche Datei + gewählte Vorlage wie beim Import, + aber nur die ersten Zeilen durch Mapping/Typkonvertierung (und Vital-Checks). + Zum Debugging wenn «Datum fehlt» o. Ä. ohne DB-Zugriff. + """ + pid = get_pid(x_profile_id) + raw = await file.read() + limits = _load_import_limits() + max_bytes = limits.get("max_file_bytes", 52_428_800) + if len(raw) > max_bytes: + raise HTTPException(413, f"Datei zu groß (max. {max_bytes} Bytes)") + text = decode_raw_bytes(raw) + if not text.strip(): + raise HTTPException(400, "Leere Datei") + + with get_db() as conn: + cur = get_cursor(conn) + m = _fetch_mapping_row(cur, mapping_id, pid, module) + + if not get_module_definition(m["module"]): + raise HTTPException(400, f"Modul nicht unterstützt: {m['module']}") + + fm = m.get("field_mappings") or {} + if isinstance(fm, str): + fm = {} + tc = m.get("type_conversions") + if not isinstance(tc, dict): + tc = {} + delim = str(m.get("delimiter") or ",") + exec_module = str(m["module"]) + + rows_out: list[dict[str, Any]] = [] + for i, row in enumerate(iter_csv_dict_rows(text, delim, has_header=True)): + if i >= 5: + break + typed = build_row_after_mapping(row, fm, tc, module=exec_module) + dm = diagnose_row_mapping(row, fm, tc, module=exec_module, mapped_typed=typed) + entry: dict[str, Any] = { + "row_index": i + 1, + **dm, + } + if exec_module == "vitals_baseline": + entry["vitals"] = diagnose_vitals_row(typed) + rows_out.append(entry) + + return { + "mapping_id": mapping_id, + "mapping_name": m.get("mapping_name"), + "module": exec_module, + "delimiter_used": delim, + "has_header": bool(m.get("has_header", True)), + "rows_diagnosed": len(rows_out), + "rows": rows_out, + } + + def _fetch_mapping_row( cur, mapping_id: int, diff --git a/backend/tests/test_csv_import_executor.py b/backend/tests/test_csv_import_executor.py index 8750d2b..63978ce 100644 --- a/backend/tests/test_csv_import_executor.py +++ b/backend/tests/test_csv_import_executor.py @@ -11,8 +11,9 @@ import uuid import pytest -from csv_parser.executor import run_universal_csv_import +from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import from csv_parser.sleep_apple_import import detect_apple_sleep_csv_format +from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping class _SeqCursor: @@ -130,6 +131,44 @@ def test_run_universal_import_vitals_baseline_upsert_insert_path(): assert any("INSERT INTO vitals_baseline" in q[0] for q in cur.executes) +def test_run_universal_import_wide_german_vitals_with_english_template_slots(): + """Breiter Apple-DE-Export, aber nur englische Vorlagen-Spalten → über Aliase erkennbar.""" + text = ( + "Datum/Uhrzeit,Aktive Energie (kJ),Ruhepuls (count/min),Atemfrequenz (count/min)" + ",Blutsauerstoffsättigung (%),Herzfrequenzvariabilität (ms),VO2 max (ml/(kg·min))\n" + "2026-04-03 00:00:00,,53,15.61,95.22,37.26,\n" + ) + mapping = { + "delimiter": ",", + "has_header": True, + "field_mappings": { + "Start": "date", + "Resting Heart Rate": "resting_hr", + "Heart Rate Variability": "hrv", + "VO2 Max": "vo2_max", + "Oxygen Saturation": "spo2", + "Respiratory Rate": "respiratory_rate", + }, + "type_conversions": { + "date": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS", + "extract": "date_only", + "flexible": True, + }, + "resting_hr": {"type": "int", "flexible": True}, + "hrv": {"type": "int", "flexible": True}, + "vo2_max": {"type": "float", "decimal_separator": "auto", "flexible": True}, + "spo2": {"type": "int", "flexible": True}, + "respiratory_rate": {"type": "float", "decimal_separator": "auto", "flexible": True}, + }, + } + cur = _SeqCursor([{"inserted": True, "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"}]) + out = run_universal_csv_import(cur, PID, "vitals_baseline", text, "wide.csv", mapping) + assert out["rows_errors"] == 0 + assert out["rows_imported"] == 1 + + def test_run_universal_import_vitals_baseline_two_rows_same_day_averages(): text = ( "Start,Resting Heart Rate,Heart Rate Variability,VO2 Max\n" @@ -276,3 +315,27 @@ def test_run_universal_import_weight_two_rows_same_day_last_value(): assert len(insert_sqls) == 1 params = insert_sqls[0][1] assert params[3] == 83.5 + + +def test_diagnose_vitals_row_and_mapping_smoke(): + fm = { + "Datum/Uhrzeit": "date", + "Ruhepuls (count/min)": "resting_hr", + } + tc = { + "date": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS", + "extract": "date_only", + "flexible": True, + }, + "resting_hr": {"type": "int", "flexible": True}, + } + row = {"Datum/Uhrzeit": "2026-04-03 00:00:00", "Ruhepuls (count/min)": "53"} + typed = build_row_after_mapping(row, fm, tc, module="vitals_baseline") + d = diagnose_vitals_row(typed) + assert d["date_coerced_iso"] == "2026-04-03" + assert d["would_pass_prefilter"] is True + dm = diagnose_row_mapping(row, fm, tc, module="vitals_baseline", mapped_typed=typed) + assert str(dm["mapped"]["date"]).startswith("2026-04-03") + assert any(c["csv_column"] == "Datum/Uhrzeit" and c["source"] == "template" for c in dm["per_column"]) diff --git a/frontend/src/pages/UniversalCsvImportPage.jsx b/frontend/src/pages/UniversalCsvImportPage.jsx index e5fd312..383c260 100644 --- a/frontend/src/pages/UniversalCsvImportPage.jsx +++ b/frontend/src/pages/UniversalCsvImportPage.jsx @@ -116,8 +116,11 @@ export default function UniversalCsvImportPage() { const [mappingId, setMappingId] = useState('') const [loadingAnalyze, setLoadingAnalyze] = useState(false) const [loadingImport, setLoadingImport] = useState(false) + const [loadingDiagnose, setLoadingDiagnose] = useState(false) const [error, setError] = useState(null) const [success, setSuccess] = useState(null) + const [lastImport, setLastImport] = useState(null) + const [diagnoseResult, setDiagnoseResult] = useState(null) const selectedChoice = useMemo( () => mappingChoices.find((c) => String(c.id) === String(mappingId)), @@ -175,6 +178,8 @@ export default function UniversalCsvImportPage() { setMappingId('') setSuccess(null) setError(null) + setLastImport(null) + setDiagnoseResult(null) void runAnalyze(f) } @@ -192,6 +197,7 @@ export default function UniversalCsvImportPage() { setSuccess(null) try { const res = await api.importUniversalCsv(file, Number(mappingId)) + setLastImport(res) const st = res.stats || {} const modLabel = MODULE_LABEL[res.module] || res.module || '' setSuccess( @@ -206,6 +212,24 @@ export default function UniversalCsvImportPage() { } } + const runDiagnose = async () => { + if (!file || !mappingId) { + setError('Bitte Datei und Vorlage wählen') + return + } + setLoadingDiagnose(true) + setError(null) + setDiagnoseResult(null) + try { + const res = await api.diagnoseUniversalCsv(file, Number(mappingId)) + setDiagnoseResult(res) + } catch (e) { + setError(e.message || 'Diagnose fehlgeschlagen') + } finally { + setLoadingDiagnose(false) + } + } + return (
+
+ + +
+ + {diagnoseResult && ( +
+ + Diagnose-Ergebnis ({diagnoseResult.rows_diagnosed ?? 0} Zeilen) + +

+ Vorlage #{diagnoseResult.mapping_id} · {diagnoseResult.mapping_name} · Modul{' '} + {MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Bei Vitalwerten: pro Zeile{' '} + vitals.would_pass_prefilter und{' '} + prefilter_fail_reason prüfen (z. B. datum_fehlt). +

+
+                {JSON.stringify(diagnoseResult, null, 2)}
+              
+
+ )}
)} diff --git a/frontend/src/utils/api.js b/frontend/src/utils/api.js index f34b93b..73a0472 100644 --- a/frontend/src/utils/api.js +++ b/frontend/src/utils/api.js @@ -490,6 +490,23 @@ export const api = { copyCsvMapping: (mappingId, body = null) => req(`/csv/mappings/${mappingId}/copy`, body ? json(body) : { method: 'POST' }), /** Universal-CSV (Issue #21): Zielmodul steckt in der Vorlage; nur file + mapping_id */ + /** Import-Diagnose: keine Datenbank-Schreibung, erste Zeilen + Mapping-Auflösung */ + diagnoseUniversalCsv: async (file, mappingId, module = null) => { + const fd = new FormData() + fd.append('file', file) + fd.append('mapping_id', String(mappingId)) + if (module) fd.append('module', module) + const res = await fetch(BASE + '/csv/import-diagnose', { method: 'POST', headers: hdrs(), body: fd }) + if (!res.ok) { + const errText = await res.text() + let parsed = null + try { + parsed = JSON.parse(errText) + } catch { /* ignore */ } + throw new Error(formatFastApiDetail(parsed?.detail, errText.trim() || `HTTP ${res.status}`)) + } + return res.json() + }, importUniversalCsv: async (file, mappingId) => { const fd = new FormData() fd.append('file', file)