feat(csv-import): Add CSV import diagnosis endpoint and related functionality
All checks were successful
Deploy Development / deploy (push) Successful in 50s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s

- Implemented a new endpoint for diagnosing CSV imports without writing to the database, allowing users to validate mappings and type conversions.
- Introduced the `diagnose_vitals_row` function to analyze vital metrics and provide detailed feedback on data validity.
- Enhanced the CSV import logic to include alias handling for vital fields, improving compatibility with different CSV formats.
- Updated the frontend to support the new diagnosis feature, including UI elements for displaying diagnosis results and error details.
- Added tests to ensure the correctness of the new diagnosis functionality and its integration with existing import processes.
This commit is contained in:
Lars 2026-04-10 16:35:31 +02:00
parent 1855f6e57a
commit c5b0540b11
6 changed files with 394 additions and 18 deletions

View File

@ -524,6 +524,36 @@ def _v_safe_float(value: Any) -> float | None:
return None
def diagnose_vitals_row(mapped_typed: dict[str, Any]) -> dict[str, Any]:
"""Erklärt Vital-Baseline-Zeile nach Typkonvertierung (ohne DB)."""
d = coerce_date(mapped_typed.get("date"))
rhr = _v_safe_int(mapped_typed.get("resting_hr"))
hrv = _v_safe_int(mapped_typed.get("hrv"))
vo2 = _v_safe_float(mapped_typed.get("vo2_max"))
spo2 = _v_safe_int(mapped_typed.get("spo2"))
resp = _v_safe_float(mapped_typed.get("respiratory_rate"))
has_metric = any(x is not None for x in (rhr, hrv, vo2, spo2, resp))
date_raw = mapped_typed.get("date")
return {
"date_coerced_iso": d.isoformat() if d else None,
"date_after_convert_repr": repr(date_raw),
"date_after_convert_type": type(date_raw).__name__,
"metrics": {
"resting_hr": rhr,
"hrv": hrv,
"vo2_max": vo2,
"spo2": spo2,
"respiratory_rate": resp,
},
"would_pass_prefilter": d is not None and has_metric,
"prefilter_fail_reason": (
"datum_fehlt"
if d is None
else ("keine_baseline_metrik" if not has_metric else None)
),
}
def _import_vitals_baseline(
cur,
profile_id: str,

View File

@ -402,6 +402,29 @@ def _lookup_db_field(csv_col: str, field_mappings: Mapping[str, str]) -> str | N
return None
def _vitals_baseline_alias_db_field(csv_col: str) -> str | None:
"""
Apple Health: deutsch Vitalwerte.csv (Breitexport) vs. schmale Vorlage
(Start / Resting Heart Rate ). Ohne Alias wählt die Analyse oft die
englische Vorlage jede Zeile Datum fehlt.
Abgleich über normalisierten Header (normalize_header_for_signature).
"""
n = normalize_header_for_signature(str(csv_col))
if n in ("datum_uhrzeit", "start", "date_time", "datetime"):
return "date"
if "ruhepuls" in n or n.startswith("resting_heart_rate"):
return "resting_hr"
if "herzfrequenzvariabilit" in n or "heart_rate_variability" in n:
return "hrv"
if "vo2" in n and "max" in n:
return "vo2_max"
if "blutsauerstoff" in n or "oxygen_saturation" in n:
return "spo2"
if "atemfrequenz" in n or "respiratory_rate" in n:
return "respiratory_rate"
return None
def build_row_after_mapping(
csv_row: Mapping[str, str],
field_mappings: Mapping[str, str],
@ -416,6 +439,8 @@ def build_row_after_mapping(
tc = type_conversions or {}
for csv_col, raw in csv_row.items():
db_field = _lookup_db_field(str(csv_col), field_mappings)
if not db_field and module == "vitals_baseline":
db_field = _vitals_baseline_alias_db_field(csv_col)
if not db_field:
continue
spec = tc.get(db_field)
@ -426,3 +451,79 @@ def build_row_after_mapping(
except Exception:
out[db_field] = None
return out
def diagnose_row_mapping(
csv_row: Mapping[str, str],
field_mappings: Mapping[str, str],
type_conversions: Mapping[str, Any] | None,
module: str | None = None,
*,
mapped_typed: Mapping[str, Any] | None = None,
max_columns: int = 96,
) -> dict[str, Any]:
"""
Nur für Diagnose-Endpunkt: Quelle (Vorlage vs. Vital-Alias), Konvertierung pro Spalte,
Ergebnis wie build_row_after_mapping (json-freundliche Vorschau).
"""
tc = type_conversions or {}
per_column: list[dict[str, Any]] = []
n = 0
for csv_col, raw in csv_row.items():
if n >= max_columns:
break
n += 1
sc = str(csv_col)
via_t = _lookup_db_field(sc, field_mappings)
via_a = None
if not via_t and module == "vitals_baseline":
via_a = _vitals_baseline_alias_db_field(sc)
target = via_t or via_a
src = "template" if via_t else ("alias" if via_a else "none")
spec = tc.get(target) if target else None
conv_err: str | None = None
conv_preview: Any = None
if target:
try:
conv_val = convert_value(
(raw or "").strip(),
target,
spec if isinstance(spec, dict) else None,
module=module,
)
conv_preview = conv_val.isoformat() if hasattr(conv_val, "isoformat") else conv_val
except Exception as e:
conv_err = str(e)
per_column.append(
{
"csv_column": sc,
"raw_preview": ((raw or "")[:120]),
"db_field": target,
"source": src,
"convert_error": conv_err,
"converted_preview": conv_preview,
}
)
src_map = (
build_row_after_mapping(csv_row, field_mappings, type_conversions, module=module)
if mapped_typed is None
else mapped_typed
)
mapped_preview: dict[str, Any] = {}
for k, v in src_map.items():
mapped_preview[k] = v.isoformat() if hasattr(v, "isoformat") else v
tmpl_keys = [
str(k)
for k, v in field_mappings.items()
if v not in (None, "-", "_skip")
]
return {
"per_column": per_column,
"columns_truncated": len(csv_row) > max_columns,
"template_mapped_keys": tmpl_keys[:40],
"template_mapped_keys_truncated": len(tmpl_keys) > 40,
"mapped": mapped_preview,
}

View File

@ -15,15 +15,17 @@ from auth import require_auth, check_feature_access, increment_feature_usage
from feature_logger import log_feature_usage
from db import get_db, get_cursor, r2d
from routers.profiles import get_pid
from csv_parser.executor import run_universal_csv_import
from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import
from csv_parser.core import (
decode_raw_bytes,
column_signature,
get_csv_import_limits,
headers_signature_rank_metrics,
iter_csv_dict_rows,
normalize_header_for_signature,
parse_csv_sample,
)
from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping
from csv_parser.field_units import source_unit_choices_for_field
from csv_parser.module_registry import get_module_definition, list_modules, validate_field_mappings
from csv_parser.sleep_apple_import import detect_apple_sleep_csv_format
@ -339,6 +341,70 @@ async def analyze_csv(
}
@router.post("/import-diagnose")
async def csv_import_diagnose(
file: UploadFile = File(...),
mapping_id: int = Form(...),
module: Optional[str] = Form(default=None),
x_profile_id: Optional[str] = Header(default=None),
session: dict = Depends(require_auth),
):
"""
Kein Import, keine Quota: gleiche Datei + gewählte Vorlage wie beim Import,
aber nur die ersten Zeilen durch Mapping/Typkonvertierung (und Vital-Checks).
Zum Debugging wenn «Datum fehlt» o. Ä. ohne DB-Zugriff.
"""
pid = get_pid(x_profile_id)
raw = await file.read()
limits = _load_import_limits()
max_bytes = limits.get("max_file_bytes", 52_428_800)
if len(raw) > max_bytes:
raise HTTPException(413, f"Datei zu groß (max. {max_bytes} Bytes)")
text = decode_raw_bytes(raw)
if not text.strip():
raise HTTPException(400, "Leere Datei")
with get_db() as conn:
cur = get_cursor(conn)
m = _fetch_mapping_row(cur, mapping_id, pid, module)
if not get_module_definition(m["module"]):
raise HTTPException(400, f"Modul nicht unterstützt: {m['module']}")
fm = m.get("field_mappings") or {}
if isinstance(fm, str):
fm = {}
tc = m.get("type_conversions")
if not isinstance(tc, dict):
tc = {}
delim = str(m.get("delimiter") or ",")
exec_module = str(m["module"])
rows_out: list[dict[str, Any]] = []
for i, row in enumerate(iter_csv_dict_rows(text, delim, has_header=True)):
if i >= 5:
break
typed = build_row_after_mapping(row, fm, tc, module=exec_module)
dm = diagnose_row_mapping(row, fm, tc, module=exec_module, mapped_typed=typed)
entry: dict[str, Any] = {
"row_index": i + 1,
**dm,
}
if exec_module == "vitals_baseline":
entry["vitals"] = diagnose_vitals_row(typed)
rows_out.append(entry)
return {
"mapping_id": mapping_id,
"mapping_name": m.get("mapping_name"),
"module": exec_module,
"delimiter_used": delim,
"has_header": bool(m.get("has_header", True)),
"rows_diagnosed": len(rows_out),
"rows": rows_out,
}
def _fetch_mapping_row(
cur,
mapping_id: int,

View File

@ -11,8 +11,9 @@ import uuid
import pytest
from csv_parser.executor import run_universal_csv_import
from csv_parser.executor import diagnose_vitals_row, run_universal_csv_import
from csv_parser.sleep_apple_import import detect_apple_sleep_csv_format
from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping
class _SeqCursor:
@ -130,6 +131,44 @@ def test_run_universal_import_vitals_baseline_upsert_insert_path():
assert any("INSERT INTO vitals_baseline" in q[0] for q in cur.executes)
def test_run_universal_import_wide_german_vitals_with_english_template_slots():
"""Breiter Apple-DE-Export, aber nur englische Vorlagen-Spalten → über Aliase erkennbar."""
text = (
"Datum/Uhrzeit,Aktive Energie (kJ),Ruhepuls (count/min),Atemfrequenz (count/min)"
",Blutsauerstoffsättigung (%),Herzfrequenzvariabilität (ms),VO2 max (ml/(kg·min))\n"
"2026-04-03 00:00:00,,53,15.61,95.22,37.26,\n"
)
mapping = {
"delimiter": ",",
"has_header": True,
"field_mappings": {
"Start": "date",
"Resting Heart Rate": "resting_hr",
"Heart Rate Variability": "hrv",
"VO2 Max": "vo2_max",
"Oxygen Saturation": "spo2",
"Respiratory Rate": "respiratory_rate",
},
"type_conversions": {
"date": {
"type": "datetime",
"format": "yyyy-mm-dd HH:MM:SS",
"extract": "date_only",
"flexible": True,
},
"resting_hr": {"type": "int", "flexible": True},
"hrv": {"type": "int", "flexible": True},
"vo2_max": {"type": "float", "decimal_separator": "auto", "flexible": True},
"spo2": {"type": "int", "flexible": True},
"respiratory_rate": {"type": "float", "decimal_separator": "auto", "flexible": True},
},
}
cur = _SeqCursor([{"inserted": True, "id": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"}])
out = run_universal_csv_import(cur, PID, "vitals_baseline", text, "wide.csv", mapping)
assert out["rows_errors"] == 0
assert out["rows_imported"] == 1
def test_run_universal_import_vitals_baseline_two_rows_same_day_averages():
text = (
"Start,Resting Heart Rate,Heart Rate Variability,VO2 Max\n"
@ -276,3 +315,27 @@ def test_run_universal_import_weight_two_rows_same_day_last_value():
assert len(insert_sqls) == 1
params = insert_sqls[0][1]
assert params[3] == 83.5
def test_diagnose_vitals_row_and_mapping_smoke():
fm = {
"Datum/Uhrzeit": "date",
"Ruhepuls (count/min)": "resting_hr",
}
tc = {
"date": {
"type": "datetime",
"format": "yyyy-mm-dd HH:MM:SS",
"extract": "date_only",
"flexible": True,
},
"resting_hr": {"type": "int", "flexible": True},
}
row = {"Datum/Uhrzeit": "2026-04-03 00:00:00", "Ruhepuls (count/min)": "53"}
typed = build_row_after_mapping(row, fm, tc, module="vitals_baseline")
d = diagnose_vitals_row(typed)
assert d["date_coerced_iso"] == "2026-04-03"
assert d["would_pass_prefilter"] is True
dm = diagnose_row_mapping(row, fm, tc, module="vitals_baseline", mapped_typed=typed)
assert str(dm["mapped"]["date"]).startswith("2026-04-03")
assert any(c["csv_column"] == "Datum/Uhrzeit" and c["source"] == "template" for c in dm["per_column"])

View File

@ -116,8 +116,11 @@ export default function UniversalCsvImportPage() {
const [mappingId, setMappingId] = useState('')
const [loadingAnalyze, setLoadingAnalyze] = useState(false)
const [loadingImport, setLoadingImport] = useState(false)
const [loadingDiagnose, setLoadingDiagnose] = useState(false)
const [error, setError] = useState(null)
const [success, setSuccess] = useState(null)
const [lastImport, setLastImport] = useState(null)
const [diagnoseResult, setDiagnoseResult] = useState(null)
const selectedChoice = useMemo(
() => mappingChoices.find((c) => String(c.id) === String(mappingId)),
@ -175,6 +178,8 @@ export default function UniversalCsvImportPage() {
setMappingId('')
setSuccess(null)
setError(null)
setLastImport(null)
setDiagnoseResult(null)
void runAnalyze(f)
}
@ -192,6 +197,7 @@ export default function UniversalCsvImportPage() {
setSuccess(null)
try {
const res = await api.importUniversalCsv(file, Number(mappingId))
setLastImport(res)
const st = res.stats || {}
const modLabel = MODULE_LABEL[res.module] || res.module || ''
setSuccess(
@ -206,6 +212,24 @@ export default function UniversalCsvImportPage() {
}
}
const runDiagnose = async () => {
if (!file || !mappingId) {
setError('Bitte Datei und Vorlage wählen')
return
}
setLoadingDiagnose(true)
setError(null)
setDiagnoseResult(null)
try {
const res = await api.diagnoseUniversalCsv(file, Number(mappingId))
setDiagnoseResult(res)
} catch (e) {
setError(e.message || 'Diagnose fehlgeschlagen')
} finally {
setLoadingDiagnose(false)
}
}
return (
<div className="capture-page" style={{ paddingBottom: 88 }}>
<button
@ -263,6 +287,32 @@ export default function UniversalCsvImportPage() {
</div>
)}
{lastImport?.error_details?.length > 0 && (
<details
className="card"
style={{ marginBottom: 16, padding: 16, cursor: 'pointer' }}
open
>
<summary style={{ fontWeight: 600, color: 'var(--text1)' }}>
Zeilenfehler vom letzten Import ({lastImport.error_details.length}) zum Kopieren aufklappen
</summary>
<pre
style={{
marginTop: 12,
fontSize: 12,
overflow: 'auto',
maxHeight: 320,
background: 'var(--surface2)',
padding: 12,
borderRadius: 8,
color: 'var(--text1)',
}}
>
{JSON.stringify(lastImport.error_details, null, 2)}
</pre>
</details>
)}
<div className="card" style={{ marginBottom: 16, padding: 16 }}>
<div className="form-label">1. CSV-Datei</div>
<input
@ -461,22 +511,71 @@ export default function UniversalCsvImportPage() {
</p>
)}
<button
type="button"
className="btn btn-primary"
style={{ marginTop: 16, width: '100%' }}
disabled={!file || !mappingId || !importAllowed || loadingImport}
onClick={handleImport}
>
{loadingImport ? (
<>
<Loader2 size={18} style={{ marginRight: 8, animation: 'spin 0.7s linear infinite' }} /> Import
läuft
</>
) : (
'Import starten'
)}
</button>
<div style={{ display: 'flex', flexDirection: 'column', gap: 12, marginTop: 16 }}>
<button
type="button"
className="btn btn-secondary"
style={{ width: '100%' }}
disabled={!file || !mappingId || !importAllowed || loadingDiagnose}
onClick={() => void runDiagnose()}
>
{loadingDiagnose ? (
<>
<Loader2
size={18}
style={{ marginRight: 8, animation: 'spin 0.7s linear infinite' }}
/>{' '}
Diagnose
</>
) : (
'Mapping prüfen (ohne Import)'
)}
</button>
<button
type="button"
className="btn btn-primary"
style={{ width: '100%' }}
disabled={!file || !mappingId || !importAllowed || loadingImport}
onClick={handleImport}
>
{loadingImport ? (
<>
<Loader2 size={18} style={{ marginRight: 8, animation: 'spin 0.7s linear infinite' }} /> Import
läuft
</>
) : (
'Import starten'
)}
</button>
</div>
{diagnoseResult && (
<details style={{ marginTop: 20 }} open>
<summary style={{ cursor: 'pointer', fontWeight: 600, color: 'var(--text2)' }}>
Diagnose-Ergebnis ({diagnoseResult.rows_diagnosed ?? 0} Zeilen)
</summary>
<p style={{ fontSize: 13, color: 'var(--text3)', marginTop: 8, lineHeight: 1.5 }}>
Vorlage #{diagnoseResult.mapping_id} · {diagnoseResult.mapping_name} · Modul{' '}
{MODULE_LABEL[diagnoseResult.module] || diagnoseResult.module}. Bei Vitalwerten: pro Zeile{' '}
<code>vitals.would_pass_prefilter</code> und{' '}
<code>prefilter_fail_reason</code> prüfen (z.B. <code>datum_fehlt</code>).
</p>
<pre
style={{
marginTop: 8,
fontSize: 11,
overflow: 'auto',
maxHeight: 480,
background: 'var(--surface2)',
padding: 12,
borderRadius: 8,
color: 'var(--text1)',
}}
>
{JSON.stringify(diagnoseResult, null, 2)}
</pre>
</details>
)}
</div>
)}
</div>

View File

@ -490,6 +490,23 @@ export const api = {
copyCsvMapping: (mappingId, body = null) =>
req(`/csv/mappings/${mappingId}/copy`, body ? json(body) : { method: 'POST' }),
/** Universal-CSV (Issue #21): Zielmodul steckt in der Vorlage; nur file + mapping_id */
/** Import-Diagnose: keine Datenbank-Schreibung, erste Zeilen + Mapping-Auflösung */
diagnoseUniversalCsv: async (file, mappingId, module = null) => {
const fd = new FormData()
fd.append('file', file)
fd.append('mapping_id', String(mappingId))
if (module) fd.append('module', module)
const res = await fetch(BASE + '/csv/import-diagnose', { method: 'POST', headers: hdrs(), body: fd })
if (!res.ok) {
const errText = await res.text()
let parsed = null
try {
parsed = JSON.parse(errText)
} catch { /* ignore */ }
throw new Error(formatFastApiDetail(parsed?.detail, errText.trim() || `HTTP ${res.status}`))
}
return res.json()
},
importUniversalCsv: async (file, mappingId) => {
const fd = new FormData()
fd.append('file', file)