diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index 5a7d630..60c5a80 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -18,6 +18,7 @@ from csv_parser.import_row_processing import ( validate_import_row_processing, ) from csv_parser.module_registry import get_module_definition +from csv_parser.import_errors import enrich_row_error from csv_parser.type_converter import build_row_after_mapping logger = logging.getLogger(__name__) @@ -756,8 +757,9 @@ def _import_vitals_baseline( cur.execute("ROLLBACK TO SAVEPOINT vitals_csv_row") except Exception: pass + err = enrich_row_error(str(e), module="vitals_baseline") error_details.append( - {"row": rows_total, "error": str(e), "context": "vitals_baseline upsert"}, + {"row": rows_total, "context": "vitals_baseline upsert", **err}, ) return { @@ -1003,7 +1005,8 @@ def _import_activity( cur.execute("ROLLBACK TO SAVEPOINT csv_activity_row") except Exception: pass - error_details.append({"row": rows_total, "error": str(e)}) + err = enrich_row_error(str(e), module="activity") + error_details.append({"row": rows_total, **err}) return { "rows_total": rows_total, diff --git a/backend/csv_parser/import_errors.py b/backend/csv_parser/import_errors.py new file mode 100644 index 0000000..62ebc3e --- /dev/null +++ b/backend/csv_parser/import_errors.py @@ -0,0 +1,53 @@ +""" +Menschenlesbare Hinweise zu typischen Import-/DB-Fehlern (Universal-CSV). +""" + +from __future__ import annotations + + +def enrich_row_error(message: str, module: str | None = None) -> dict[str, str | None]: + """ + Ergänzt eine Rohexception-Zeichenkette um ``code`` und ``hint`` für die Fehlerliste im Import. + """ + low = (message or "").lower() + out: dict[str, str | None] = {"error": message, "code": None, "hint": None} + + if "numeric field overflow" in low or "numeric value out of range" in low: + out["code"] = "db_numeric_overflow" + out["hint"] = ( + "Wert passt nicht in die Datenbank-Spalte (z. B. NUMERIC mit begrenzter Größe). " + "Häufig: Kilojoule aus dem Export landen im Kalorien-Feld – in der Vorlage für kcal_active/kcal_resting " + '"source_unit": "kj" setzen. Oder eine falsche CSV-Spalte ist einem kleinen Zielfeld zugeordnet ' + "(z. B. große Zahl in einem HF-Feld)." + ) + return out + + if "violates check constraint" in low and "source" in low: + out["code"] = "db_check_constraint_source" + out["hint"] = ( + "Die Tabelle erlaubt den gesetzten «source»-Wert nicht. " + "System-Vorlage / Migration zur erlaubten Quelle prüfen (z. B. csv für Universal-Import)." + ) + return out + + if "current transaction is aborted" in low: + out["code"] = "transaction_aborted" + out["hint"] = ( + "Eine frühere Zeile hat einen Datenbankfehler ausgelöst. " + "Zuerst die niedrigste Zeilennummer in error_details beheben (Vorlage/Daten prüfen)." + ) + return out + + if "invalid input syntax" in low and "time" in low: + out["code"] = "db_time_cast" + out["hint"] = ( + "start_time/end_time passen nicht zum erwarteten Zeitformat in der Datenbank. " + "Vorlage: Datums- und Zeitanteil konsistent (oft nur Uhrzeit, wenn date separat)." + ) + return out + + if module == "activity" and "foreign key" in low: + out["code"] = "db_foreign_key" + out["hint"] = "Verknüpfung zur Datenbank verletzt (z. B. training_type). Support kontaktieren." + + return out diff --git a/backend/csv_parser/template_validator.py b/backend/csv_parser/template_validator.py new file mode 100644 index 0000000..63edee4 --- /dev/null +++ b/backend/csv_parser/template_validator.py @@ -0,0 +1,223 @@ +""" +Formatprüfung für CSV-Import-Vorlagen (field_mappings, type_conversions). + +Liefert strukturierte Fehler/Warnungen für Admin-UI und Speicher-Guards. +""" + +from __future__ import annotations + +from typing import Any, Mapping + +from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec +from csv_parser.module_registry import ( + get_module_definition, + validate_field_mappings, + validate_required_field_targets, +) + +ALLOWED_SPEC_TYPES = frozenset( + {"string", "float", "number", "int", "date", "time", "datetime", "duration"} +) + + +def _issue( + severity: str, + code: str, + message: str, + *, + hint: str | None = None, + field: str | None = None, + csv_columns: list[str] | None = None, +) -> dict[str, Any]: + out: dict[str, Any] = { + "severity": severity, + "code": code, + "message": message, + } + if hint: + out["hint"] = hint + if field: + out["field"] = field + if csv_columns: + out["csv_columns"] = csv_columns + return out + + +def validate_csv_template( + module: str, + field_mappings: Mapping[str, Any] | None, + type_conversions: Mapping[str, Any] | None = None, + import_row_processing: Mapping[str, Any] | None = None, + column_signature: list[str] | None = None, +) -> dict[str, Any]: + """ + Prüft eine Vorlage ohne Datei-Upload. + + Returns: + ``{"valid": bool, "errors": [...], "warnings": [...]}`` + """ + errors: list[dict[str, Any]] = [] + warnings: list[dict[str, Any]] = [] + + fm = dict(field_mappings or {}) + tc: dict[str, Any] = dict(type_conversions or {}) if type_conversions else {} + mod = get_module_definition(module) + if not mod: + errors.append( + _issue( + "error", + "unknown_module", + f"Unbekanntes Modul «{module}».", + hint="Nur registrierte Module in module_registry sind erlaubt.", + ) + ) + return {"valid": False, "errors": errors, "warnings": warnings} + + try: + validate_field_mappings(module, fm) + except ValueError as e: + errors.append( + _issue( + "error", + "invalid_field_mapping", + str(e), + hint="Jede Zuordnung muss auf ein bekanntes Zielfeld des Moduls zeigen (oder „–“ / ignorieren).", + ) + ) + + try: + validate_required_field_targets(module, fm) + except ValueError as e: + errors.append( + _issue( + "error", + "missing_required_target", + str(e), + hint="Pflichtfelder des Moduls müssen mindestens einer CSV-Spalte zugeordnet sein.", + ) + ) + + if import_row_processing: + try: + validate_import_row_processing_spec(module, import_row_processing, fm) + except ValueError as e: + errors.append( + _issue( + "error", + "invalid_import_row_processing", + str(e), + hint="import_row_processing: group_by und aggregates prüfen (siehe Doku Issue #21).", + ) + ) + + field_defs = mod.get("fields") or {} + for db_field, spec in tc.items(): + if db_field not in field_defs: + errors.append( + _issue( + "error", + "unknown_type_conversion_field", + f"type_conversions enthält unbekanntes Zielfeld «{db_field}».", + hint="Nur Felder aus der Moduldefinition sind erlaubt.", + field=db_field, + ) + ) + continue + if not isinstance(spec, Mapping): + errors.append( + _issue( + "error", + "type_conversion_not_object", + f"type_conversions[\"{db_field}\"] muss ein JSON-Objekt sein.", + field=db_field, + ) + ) + continue + stype = spec.get("type", "string") + if stype not in ALLOWED_SPEC_TYPES: + warnings.append( + _issue( + "warning", + "unusual_conversion_type", + f"Ungewöhnlicher Typ «{stype}» für «{db_field}» (erwartet u. a. string, float, date, datetime).", + field=db_field, + ) + ) + + finfo = field_defs.get(db_field) or {} + expected = finfo.get("type") + if expected == "date" and stype not in ("date", "datetime"): + warnings.append( + _issue( + "warning", + "date_field_conversion", + f"Zielfeld «{db_field}» ist ein Datum; der Konvertierungstyp ist «{stype}».", + hint="Meist «date» oder «datetime» mit passendem format.", + field=db_field, + ) + ) + if expected == "float" and stype == "int" and db_field in ("hr_avg", "hr_max"): + warnings.append( + _issue( + "warning", + "hr_as_int", + "Herzfrequenz als «int» konvertiert; Nachkommastellen aus Apple-Export gehen verloren.", + hint="Optional «float» mit flexible: true verwenden.", + field=db_field, + ) + ) + + # Mehrere CSV-Spalten → dasselbe Zielfeld + by_target: dict[str, list[str]] = {} + for csv_col, dbf in fm.items(): + if dbf in (None, "", "-", "_skip"): + continue + by_target.setdefault(str(dbf), []).append(str(csv_col)) + for dbf, cols in by_target.items(): + if len(cols) > 1: + warnings.append( + _issue( + "warning", + "duplicate_target_columns", + f"Mehrere Spalten mappen auf «{dbf}»: {', '.join(cols)}.", + hint="Beim Import gewinnt die letzte Spalte in der CSV-Kopfzeilen-Reihenfolge.", + field=dbf, + csv_columns=cols, + ) + ) + + # Kilojoule in kcal-Feldern (häufiger Apple-DE-Fehler) + for csv_col, dbf in fm.items(): + if dbf not in ("kcal_active", "kcal_resting"): + continue + col_l = str(csv_col).lower() + if "kj" in col_l or "kilojoule" in col_l: + sub = tc.get(dbf) + su = (sub or {}).get("source_unit") if isinstance(sub, Mapping) else None + if str(su or "").strip().lower() != "kj": + warnings.append( + _issue( + "warning", + "energy_kj_without_source_unit", + f"Spalte «{csv_col}» deutet auf Kilojoule, Zielfeld «{dbf}» speichert kcal.", + hint='In type_conversions für dieses Feld "source_unit": "kj" setzen (Faktor 1/4.184).', + field=str(dbf), + csv_columns=[str(csv_col)], + ) + ) + + # Signatur vs. gemappte Spalten (nur Hinweis) + if column_signature: + sig_norm = {str(c).strip() for c in column_signature if str(c).strip()} + mapped_cols = {str(k).strip() for k in fm.keys()} + if sig_norm and not sig_norm.intersection(mapped_cols): + warnings.append( + _issue( + "warning", + "signature_vs_mappings_mismatch", + "column_signature und die Schlüssel in field_mappings haben keine gemeinsame Spalte.", + hint="Signatur dient dem Ranking; für den Import müssen die Kopfzeilen der Datei zu den Keys in field_mappings passen (oder Aliase greifen).", + ) + ) + + return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings} diff --git a/backend/routers/admin_csv_templates.py b/backend/routers/admin_csv_templates.py index bafc20a..2b3b7c7 100644 --- a/backend/routers/admin_csv_templates.py +++ b/backend/routers/admin_csv_templates.py @@ -23,11 +23,8 @@ from csv_parser.mapping_suggest import build_type_conversions_for_mapping, sugge from csv_parser.import_row_processing import ( validate_import_row_processing as validate_import_row_processing_spec, ) -from csv_parser.module_registry import ( - get_module_definition, - validate_field_mappings, - validate_required_field_targets, -) +from csv_parser.module_registry import get_module_definition +from csv_parser.template_validator import validate_csv_template router = APIRouter(prefix="/api/admin/csv-templates", tags=["admin", "csv-import"]) @@ -62,6 +59,16 @@ class CsvImportLimitsBody(BaseModel): max_file_bytes: int = Field(..., ge=10_000, le=2_147_483_648) +class CsvTemplateValidateBody(BaseModel): + """Formatprüfung ohne Speichern (field_mappings + type_conversions + optional row_processing).""" + + module: str + field_mappings: dict = Field(default_factory=dict) + type_conversions: Optional[dict] = None + import_row_processing: Optional[dict] = None + column_signature: Optional[List[str]] = None + + def _row_full(m: dict) -> dict: return { "id": m["id"], @@ -255,6 +262,23 @@ def _admin_csv_limits() -> dict[str, int]: return get_csv_import_limits(r2d(row) if row else None) +@router.post("/validate") +def validate_system_template_dry_run(body: CsvTemplateValidateBody, session: dict = Depends(require_admin)): + """ + Validatorlauf für eine Vorlagen-Konfiguration (ohne DB-Schreiben). + Nutzbar aus dem Admin-Editor vor dem Speichern. + """ + if not get_module_definition(body.module): + raise HTTPException(400, f"Unbekanntes Modul: {body.module}") + return validate_csv_template( + body.module, + body.field_mappings, + body.type_conversions, + body.import_row_processing, + body.column_signature, + ) + + @router.get("/{template_id}") def get_system_template(template_id: int, session: dict = Depends(require_admin)): with get_db() as conn: @@ -273,17 +297,15 @@ def get_system_template(template_id: int, session: dict = Depends(require_admin) def create_system_template(body: CsvSystemTemplateCreate, session: dict = Depends(require_admin)): if not get_module_definition(body.module): raise HTTPException(400, f"Unbekanntes Modul: {body.module}") - try: - validate_field_mappings(body.module, body.field_mappings) - validate_required_field_targets(body.module, body.field_mappings) - except ValueError as e: - raise HTTPException(400, str(e)) - - if body.import_row_processing: - try: - validate_import_row_processing_spec(body.module, body.import_row_processing, body.field_mappings) - except ValueError as e: - raise HTTPException(400, str(e)) + report = validate_csv_template( + body.module, + body.field_mappings, + body.type_conversions, + body.import_row_processing, + body.column_signature, + ) + if not report["valid"]: + raise HTTPException(status_code=422, detail=report) with get_db() as conn: cur = get_cursor(conn) @@ -311,7 +333,7 @@ def create_system_template(body: CsvSystemTemplateCreate, session: dict = Depend ), ) new_id = cur.fetchone()["id"] - return {"id": new_id} + return {"id": new_id, "validation": report} @router.put("/{template_id}") @@ -335,12 +357,19 @@ def update_system_template( return _row_full(existing) fm = patch.get("field_mappings", existing["field_mappings"]) - if "field_mappings" in patch: - try: - validate_field_mappings(existing["module"], fm) - validate_required_field_targets(existing["module"], fm) - except ValueError as e: - raise HTTPException(400, str(e)) + tc_eff = patch.get("type_conversions", existing.get("type_conversions")) + irp_eff = patch.get("import_row_processing", existing.get("import_row_processing")) + col_eff = patch.get("column_signature", existing.get("column_signature")) + + report = validate_csv_template( + existing["module"], + fm, + tc_eff, + irp_eff, + col_eff if isinstance(col_eff, list) else None, + ) + if not report["valid"]: + raise HTTPException(status_code=422, detail=report) fields_sql = [] vals: list = [] @@ -371,15 +400,6 @@ def update_system_template( vals.append(Json(tc) if tc is not None else None) if "import_row_processing" in patch: irp = patch["import_row_processing"] - if irp: - try: - validate_import_row_processing_spec( - existing["module"], - irp, - patch.get("field_mappings", existing["field_mappings"]), - ) - except ValueError as e: - raise HTTPException(400, str(e)) fields_sql.append("import_row_processing = %s") vals.append(Json(irp) if irp is not None else None) @@ -393,7 +413,7 @@ def update_system_template( cur.execute("SELECT * FROM csv_field_mappings WHERE id = %s", (template_id,)) m = r2d(cur.fetchone()) - return _row_full(m) + return {**_row_full(m), "validation": report} @router.delete("/{template_id}") diff --git a/backend/routers/csv_import.py b/backend/routers/csv_import.py index 2a4b7d4..5f618da 100644 --- a/backend/routers/csv_import.py +++ b/backend/routers/csv_import.py @@ -32,6 +32,7 @@ from csv_parser.core import ( ) from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping from csv_parser.field_units import source_unit_choices_for_field +from csv_parser.import_errors import enrich_row_error from csv_parser.module_registry import get_module_definition, list_modules, validate_field_mappings from csv_parser.sleep_apple_import import detect_apple_sleep_csv_format @@ -554,6 +555,7 @@ async def csv_import_execute( except Exception as exec_err: logger.exception("Universal-CSV-Import fehlgeschlagen: %s", exec_err) cur.execute("ROLLBACK TO SAVEPOINT csv_import_exec") + err_payload = {"error": str(exec_err), **enrich_row_error(str(exec_err), exec_module)} cur.execute( """ UPDATE csv_import_log SET @@ -562,9 +564,13 @@ async def csv_import_execute( error_details = %s WHERE id = %s """, - (Json([{"error": str(exec_err)}]), log_id), + (Json([err_payload]), log_id), ) - err_response = HTTPException(500, f"Import fehlgeschlagen: {exec_err}") + hint = err_payload.get("hint") + msg = f"Import fehlgeschlagen: {exec_err}" + if hint: + msg = f"{msg} ({hint})" + err_response = HTTPException(500, msg) else: cur.execute("RELEASE SAVEPOINT csv_import_exec") cur.execute( diff --git a/backend/tests/test_import_errors.py b/backend/tests/test_import_errors.py new file mode 100644 index 0000000..65b5195 --- /dev/null +++ b/backend/tests/test_import_errors.py @@ -0,0 +1,10 @@ +from csv_parser.import_errors import enrich_row_error + + +def test_enrich_numeric_overflow(): + d = enrich_row_error( + "numeric field overflow\nDETAIL: A field with precision 5, scale 2\n", + module="activity", + ) + assert d["code"] == "db_numeric_overflow" + assert d["hint"] and "kj" in d["hint"].lower() diff --git a/backend/tests/test_template_validator.py b/backend/tests/test_template_validator.py new file mode 100644 index 0000000..88fb25d --- /dev/null +++ b/backend/tests/test_template_validator.py @@ -0,0 +1,45 @@ +"""Formatprüfung CSV-Vorlagen (template_validator).""" + +from __future__ import annotations + +from csv_parser.template_validator import validate_csv_template + + +def test_validate_kj_column_warns_without_source_unit(): + r = validate_csv_template( + "activity", + {"Aktive Energie (kJ)": "kcal_active", "Start": "start_time", "Trainingsart": "activity_type"}, + {"start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True}}, + None, + None, + ) + assert r["valid"] is True + codes = {w["code"] for w in r["warnings"]} + assert "energy_kj_without_source_unit" in codes + + +def test_validate_invalid_target_error(): + r = validate_csv_template( + "activity", + {"X": "not_a_field"}, + {}, + None, + None, + ) + assert r["valid"] is False + assert any(e["code"] == "invalid_field_mapping" for e in r["errors"]) + + +def test_validate_duplicate_target_warning(): + r = validate_csv_template( + "weight", + {"A": "weight", "B": "weight", "Tag": "date"}, + { + "weight": {"type": "float", "decimal_separator": "."}, + "date": {"type": "date", "format": "yyyy-mm-dd", "flexible": True}, + }, + None, + None, + ) + assert r["valid"] is True + assert any(w["code"] == "duplicate_target_columns" for w in r["warnings"]) diff --git a/backend/version.py b/backend/version.py index 9f52bbf..c90e6c0 100644 --- a/backend/version.py +++ b/backend/version.py @@ -31,8 +31,8 @@ MODULE_VERSIONS = { "membership": "2.1.0", "workflow": "0.6.0", # Phase 4: End Node Template Engine "app_dashboard": "1.11.0", # Entitlements: DB-Override widget→features (AND), sonst Katalog - "csv_import": "0.3.1", # GET /csv/modules: import_row_processing_default pro Modul - "admin_csv_templates": "0.2.0", # Admin-Editor: Zeilenaggregation (Schlüssel + gemeinsame Funktion) + "csv_import": "0.3.2", # Import-Fehler: enrich_row_error / freundlichere 500-Hinweise + "admin_csv_templates": "0.3.0", # POST /validate + Speichern nur bei valid (422 + warnings in Response) } CHANGELOG = [ diff --git a/frontend/src/pages/AdminCsvTemplateEditorPage.jsx b/frontend/src/pages/AdminCsvTemplateEditorPage.jsx index 8dfdd17..90e6fc3 100644 --- a/frontend/src/pages/AdminCsvTemplateEditorPage.jsx +++ b/frontend/src/pages/AdminCsvTemplateEditorPage.jsx @@ -278,6 +278,8 @@ export default function AdminCsvTemplateEditorPage() { const [loading, setLoading] = useState(!isNew) const [analyzing, setAnalyzing] = useState(false) const [saving, setSaving] = useState(false) + const [validating, setValidating] = useState(false) + const [validationReport, setValidationReport] = useState(null) const [error, setError] = useState(null) /** Entwurf für „Quelle entspricht Ziel“ (nur source_unit custom); Commit bei Blur/Speichern. */ const [customEquivalenceDraftByField, setCustomEquivalenceDraftByField] = useState({}) @@ -593,6 +595,38 @@ export default function AdminCsvTemplateEditorPage() { setFieldMappings((prev) => ({ ...prev, [col]: dbField || '-' })) } + const handleFormatCheck = async () => { + setError(null) + setValidationReport(null) + let tc + try { + tc = JSON.parse(typeConversionsText || '{}') + if (tc !== null && typeof tc !== 'object') throw new Error() + } catch { + setError('type_conversions: ungültiges JSON.') + return + } + if (!module) { + setError('Modul wählen.') + return + } + setValidating(true) + try { + const r = await api.adminValidateCsvTemplate({ + module, + field_mappings: fieldMappings, + type_conversions: tc, + import_row_processing: null, + column_signature: columnSignature.length ? columnSignature : null, + }) + setValidationReport(r) + } catch (e) { + setError(e.message || 'Formatprüfung fehlgeschlagen') + } finally { + setValidating(false) + } + } + const handleSave = async () => { setError(null) let textForTc = typeConversionsText @@ -1432,7 +1466,49 @@ export default function AdminCsvTemplateEditorPage() { /> + {validationReport ? ( +
+ Ohne Zeilenaggregations-JSON; vollständige Prüfung inkl. Aggregation beim Speichern. Warnungen blockieren nicht. +
+ {validationReport.errors?.length ? ( +