fix(csv_parser): Normalize header comparison in CSV template validation
- Updated the `validate_csv_template` function to normalize both the column signature and field mappings for accurate comparison, preventing false warnings about mismatches. - Enhanced warning messages to provide clearer guidance on the relationship between normalized signatures and raw field mappings. - Added a new test to ensure that normalized signatures do not trigger false warnings when compared to raw mappings.
This commit is contained in:
parent
0629f88b37
commit
ebca44829e
|
|
@ -8,6 +8,7 @@ from __future__ import annotations
|
|||
|
||||
from typing import Any, Mapping
|
||||
|
||||
from csv_parser.core import normalize_header_for_signature
|
||||
from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec
|
||||
from csv_parser.module_registry import (
|
||||
get_module_definition,
|
||||
|
|
@ -206,17 +207,28 @@ def validate_csv_template(
|
|||
)
|
||||
)
|
||||
|
||||
# Signatur vs. gemappte Spalten (nur Hinweis)
|
||||
# Signatur vs. gemappte Spalten: beide Seiten wie beim Import normalisieren
|
||||
# (column_signature kann sortierte Normalform aus Analyse sein, field_mappings rohe Header).
|
||||
if column_signature:
|
||||
sig_norm = {str(c).strip() for c in column_signature if str(c).strip()}
|
||||
mapped_cols = {str(k).strip() for k in fm.keys()}
|
||||
if sig_norm and not sig_norm.intersection(mapped_cols):
|
||||
sig_forms = {
|
||||
normalize_header_for_signature(str(c))
|
||||
for c in column_signature
|
||||
if str(c).strip()
|
||||
}
|
||||
sig_forms.discard("")
|
||||
mapped_forms = {
|
||||
normalize_header_for_signature(str(k))
|
||||
for k in fm.keys()
|
||||
if str(k).strip()
|
||||
}
|
||||
mapped_forms.discard("")
|
||||
if sig_forms and mapped_forms and not sig_forms.intersection(mapped_forms):
|
||||
warnings.append(
|
||||
_issue(
|
||||
"warning",
|
||||
"signature_vs_mappings_mismatch",
|
||||
"column_signature und die Schlüssel in field_mappings haben keine gemeinsame Spalte.",
|
||||
hint="Signatur dient dem Ranking; für den Import müssen die Kopfzeilen der Datei zu den Keys in field_mappings passen (oder Aliase greifen).",
|
||||
"column_signature und field_mappings (Schlüssel) haben nach Normalisierung keine gemeinsame Spalte.",
|
||||
hint="Prüfen Sie, ob die gespeicherte Signatur zur CSV passt; Zuordnungen nutzen rohe Kopfzeilen, die Signatur oft die gleiche Normalform wie in der Analyse.",
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,31 @@ def test_validate_invalid_target_error():
|
|||
assert any(e["code"] == "invalid_field_mapping" for e in r["errors"])
|
||||
|
||||
|
||||
def test_signature_normalized_vs_raw_mapping_no_false_warning():
|
||||
"""Analyse liefert normalisierte Signatur; field_mappings nutzt Original-Header wie im Seed."""
|
||||
r = validate_csv_template(
|
||||
"blood_pressure",
|
||||
{
|
||||
"Datum": "measured_date",
|
||||
"Zeit": "measured_time",
|
||||
"Systolisch (mmHg)": "systolic",
|
||||
"Diastolisch (mmHg)": "diastolic",
|
||||
"Puls (bpm)": "pulse",
|
||||
},
|
||||
{
|
||||
"measured_date": {"type": "date", "format": "dd.mm.yyyy"},
|
||||
"measured_time": {"type": "time", "format": "HH:MM"},
|
||||
"systolic": {"type": "int"},
|
||||
"diastolic": {"type": "int"},
|
||||
"pulse": {"type": "int"},
|
||||
},
|
||||
None,
|
||||
["datum", "diastolisch_mmhg", "puls_bpm", "systolisch_mmhg", "zeit"],
|
||||
)
|
||||
assert r["valid"] is True
|
||||
assert not any(w["code"] == "signature_vs_mappings_mismatch" for w in r["warnings"])
|
||||
|
||||
|
||||
def test_validate_duplicate_target_warning():
|
||||
r = validate_csv_template(
|
||||
"weight",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user