fix(csv_parser): Normalize header comparison in CSV template validation
All checks were successful
Deploy Development / deploy (push) Successful in 56s
Build Test / pytest-backend (push) Successful in 4s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 16s

- Updated the `validate_csv_template` function to normalize both the column signature and field mappings for accurate comparison, preventing false warnings about mismatches.
- Enhanced warning messages to provide clearer guidance on the relationship between normalized signatures and raw field mappings.
- Added a new test to ensure that normalized signatures do not trigger false warnings when compared to raw mappings.
This commit is contained in:
Lars 2026-04-11 06:52:17 +02:00
parent 0629f88b37
commit ebca44829e
2 changed files with 43 additions and 6 deletions

View File

@ -8,6 +8,7 @@ from __future__ import annotations
from typing import Any, Mapping
from csv_parser.core import normalize_header_for_signature
from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec
from csv_parser.module_registry import (
get_module_definition,
@ -206,17 +207,28 @@ def validate_csv_template(
)
)
# Signatur vs. gemappte Spalten (nur Hinweis)
# Signatur vs. gemappte Spalten: beide Seiten wie beim Import normalisieren
# (column_signature kann sortierte Normalform aus Analyse sein, field_mappings rohe Header).
if column_signature:
sig_norm = {str(c).strip() for c in column_signature if str(c).strip()}
mapped_cols = {str(k).strip() for k in fm.keys()}
if sig_norm and not sig_norm.intersection(mapped_cols):
sig_forms = {
normalize_header_for_signature(str(c))
for c in column_signature
if str(c).strip()
}
sig_forms.discard("")
mapped_forms = {
normalize_header_for_signature(str(k))
for k in fm.keys()
if str(k).strip()
}
mapped_forms.discard("")
if sig_forms and mapped_forms and not sig_forms.intersection(mapped_forms):
warnings.append(
_issue(
"warning",
"signature_vs_mappings_mismatch",
"column_signature und die Schlüssel in field_mappings haben keine gemeinsame Spalte.",
hint="Signatur dient dem Ranking; für den Import müssen die Kopfzeilen der Datei zu den Keys in field_mappings passen (oder Aliase greifen).",
"column_signature und field_mappings (Schlüssel) haben nach Normalisierung keine gemeinsame Spalte.",
hint="Prüfen Sie, ob die gespeicherte Signatur zur CSV passt; Zuordnungen nutzen rohe Kopfzeilen, die Signatur oft die gleiche Normalform wie in der Analyse.",
)
)

View File

@ -30,6 +30,31 @@ def test_validate_invalid_target_error():
assert any(e["code"] == "invalid_field_mapping" for e in r["errors"])
def test_signature_normalized_vs_raw_mapping_no_false_warning():
"""Analyse liefert normalisierte Signatur; field_mappings nutzt Original-Header wie im Seed."""
r = validate_csv_template(
"blood_pressure",
{
"Datum": "measured_date",
"Zeit": "measured_time",
"Systolisch (mmHg)": "systolic",
"Diastolisch (mmHg)": "diastolic",
"Puls (bpm)": "pulse",
},
{
"measured_date": {"type": "date", "format": "dd.mm.yyyy"},
"measured_time": {"type": "time", "format": "HH:MM"},
"systolic": {"type": "int"},
"diastolic": {"type": "int"},
"pulse": {"type": "int"},
},
None,
["datum", "diastolisch_mmhg", "puls_bpm", "systolisch_mmhg", "zeit"],
)
assert r["valid"] is True
assert not any(w["code"] == "signature_vs_mappings_mismatch" for w in r["warnings"])
def test_validate_duplicate_target_warning():
r = validate_csv_template(
"weight",