From ebca44829e7ec26d20922c7c0b131afc5ea0f2ec Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 11 Apr 2026 06:52:17 +0200 Subject: [PATCH] fix(csv_parser): Normalize header comparison in CSV template validation - Updated the `validate_csv_template` function to normalize both the column signature and field mappings for accurate comparison, preventing false warnings about mismatches. - Enhanced warning messages to provide clearer guidance on the relationship between normalized signatures and raw field mappings. - Added a new test to ensure that normalized signatures do not trigger false warnings when compared to raw mappings. --- backend/csv_parser/template_validator.py | 24 +++++++++++++++++------ backend/tests/test_template_validator.py | 25 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/backend/csv_parser/template_validator.py b/backend/csv_parser/template_validator.py index 63edee4..bbee6c9 100644 --- a/backend/csv_parser/template_validator.py +++ b/backend/csv_parser/template_validator.py @@ -8,6 +8,7 @@ from __future__ import annotations from typing import Any, Mapping +from csv_parser.core import normalize_header_for_signature from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec from csv_parser.module_registry import ( get_module_definition, @@ -206,17 +207,28 @@ def validate_csv_template( ) ) - # Signatur vs. gemappte Spalten (nur Hinweis) + # Signatur vs. gemappte Spalten: beide Seiten wie beim Import normalisieren + # (column_signature kann sortierte Normalform aus Analyse sein, field_mappings rohe Header). if column_signature: - sig_norm = {str(c).strip() for c in column_signature if str(c).strip()} - mapped_cols = {str(k).strip() for k in fm.keys()} - if sig_norm and not sig_norm.intersection(mapped_cols): + sig_forms = { + normalize_header_for_signature(str(c)) + for c in column_signature + if str(c).strip() + } + sig_forms.discard("") + mapped_forms = { + normalize_header_for_signature(str(k)) + for k in fm.keys() + if str(k).strip() + } + mapped_forms.discard("") + if sig_forms and mapped_forms and not sig_forms.intersection(mapped_forms): warnings.append( _issue( "warning", "signature_vs_mappings_mismatch", - "column_signature und die Schlüssel in field_mappings haben keine gemeinsame Spalte.", - hint="Signatur dient dem Ranking; für den Import müssen die Kopfzeilen der Datei zu den Keys in field_mappings passen (oder Aliase greifen).", + "column_signature und field_mappings (Schlüssel) haben nach Normalisierung keine gemeinsame Spalte.", + hint="Prüfen Sie, ob die gespeicherte Signatur zur CSV passt; Zuordnungen nutzen rohe Kopfzeilen, die Signatur oft die gleiche Normalform wie in der Analyse.", ) ) diff --git a/backend/tests/test_template_validator.py b/backend/tests/test_template_validator.py index 88fb25d..b8e4df5 100644 --- a/backend/tests/test_template_validator.py +++ b/backend/tests/test_template_validator.py @@ -30,6 +30,31 @@ def test_validate_invalid_target_error(): assert any(e["code"] == "invalid_field_mapping" for e in r["errors"]) +def test_signature_normalized_vs_raw_mapping_no_false_warning(): + """Analyse liefert normalisierte Signatur; field_mappings nutzt Original-Header wie im Seed.""" + r = validate_csv_template( + "blood_pressure", + { + "Datum": "measured_date", + "Zeit": "measured_time", + "Systolisch (mmHg)": "systolic", + "Diastolisch (mmHg)": "diastolic", + "Puls (bpm)": "pulse", + }, + { + "measured_date": {"type": "date", "format": "dd.mm.yyyy"}, + "measured_time": {"type": "time", "format": "HH:MM"}, + "systolic": {"type": "int"}, + "diastolic": {"type": "int"}, + "pulse": {"type": "int"}, + }, + None, + ["datum", "diastolisch_mmhg", "puls_bpm", "systolisch_mmhg", "zeit"], + ) + assert r["valid"] is True + assert not any(w["code"] == "signature_vs_mappings_mismatch" for w in r["warnings"]) + + def test_validate_duplicate_target_warning(): r = validate_csv_template( "weight",