fix(csv_parser): Normalize header comparison in CSV template validation
- Updated the `validate_csv_template` function to normalize both the column signature and field mappings for accurate comparison, preventing false warnings about mismatches. - Enhanced warning messages to provide clearer guidance on the relationship between normalized signatures and raw field mappings. - Added a new test to ensure that normalized signatures do not trigger false warnings when compared to raw mappings.
This commit is contained in:
parent
0629f88b37
commit
ebca44829e
|
|
@ -8,6 +8,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any, Mapping
|
from typing import Any, Mapping
|
||||||
|
|
||||||
|
from csv_parser.core import normalize_header_for_signature
|
||||||
from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec
|
from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec
|
||||||
from csv_parser.module_registry import (
|
from csv_parser.module_registry import (
|
||||||
get_module_definition,
|
get_module_definition,
|
||||||
|
|
@ -206,17 +207,28 @@ def validate_csv_template(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Signatur vs. gemappte Spalten (nur Hinweis)
|
# Signatur vs. gemappte Spalten: beide Seiten wie beim Import normalisieren
|
||||||
|
# (column_signature kann sortierte Normalform aus Analyse sein, field_mappings rohe Header).
|
||||||
if column_signature:
|
if column_signature:
|
||||||
sig_norm = {str(c).strip() for c in column_signature if str(c).strip()}
|
sig_forms = {
|
||||||
mapped_cols = {str(k).strip() for k in fm.keys()}
|
normalize_header_for_signature(str(c))
|
||||||
if sig_norm and not sig_norm.intersection(mapped_cols):
|
for c in column_signature
|
||||||
|
if str(c).strip()
|
||||||
|
}
|
||||||
|
sig_forms.discard("")
|
||||||
|
mapped_forms = {
|
||||||
|
normalize_header_for_signature(str(k))
|
||||||
|
for k in fm.keys()
|
||||||
|
if str(k).strip()
|
||||||
|
}
|
||||||
|
mapped_forms.discard("")
|
||||||
|
if sig_forms and mapped_forms and not sig_forms.intersection(mapped_forms):
|
||||||
warnings.append(
|
warnings.append(
|
||||||
_issue(
|
_issue(
|
||||||
"warning",
|
"warning",
|
||||||
"signature_vs_mappings_mismatch",
|
"signature_vs_mappings_mismatch",
|
||||||
"column_signature und die Schlüssel in field_mappings haben keine gemeinsame Spalte.",
|
"column_signature und field_mappings (Schlüssel) haben nach Normalisierung keine gemeinsame Spalte.",
|
||||||
hint="Signatur dient dem Ranking; für den Import müssen die Kopfzeilen der Datei zu den Keys in field_mappings passen (oder Aliase greifen).",
|
hint="Prüfen Sie, ob die gespeicherte Signatur zur CSV passt; Zuordnungen nutzen rohe Kopfzeilen, die Signatur oft die gleiche Normalform wie in der Analyse.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,31 @@ def test_validate_invalid_target_error():
|
||||||
assert any(e["code"] == "invalid_field_mapping" for e in r["errors"])
|
assert any(e["code"] == "invalid_field_mapping" for e in r["errors"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_normalized_vs_raw_mapping_no_false_warning():
|
||||||
|
"""Analyse liefert normalisierte Signatur; field_mappings nutzt Original-Header wie im Seed."""
|
||||||
|
r = validate_csv_template(
|
||||||
|
"blood_pressure",
|
||||||
|
{
|
||||||
|
"Datum": "measured_date",
|
||||||
|
"Zeit": "measured_time",
|
||||||
|
"Systolisch (mmHg)": "systolic",
|
||||||
|
"Diastolisch (mmHg)": "diastolic",
|
||||||
|
"Puls (bpm)": "pulse",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"measured_date": {"type": "date", "format": "dd.mm.yyyy"},
|
||||||
|
"measured_time": {"type": "time", "format": "HH:MM"},
|
||||||
|
"systolic": {"type": "int"},
|
||||||
|
"diastolic": {"type": "int"},
|
||||||
|
"pulse": {"type": "int"},
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
["datum", "diastolisch_mmhg", "puls_bpm", "systolisch_mmhg", "zeit"],
|
||||||
|
)
|
||||||
|
assert r["valid"] is True
|
||||||
|
assert not any(w["code"] == "signature_vs_mappings_mismatch" for w in r["warnings"])
|
||||||
|
|
||||||
|
|
||||||
def test_validate_duplicate_target_warning():
|
def test_validate_duplicate_target_warning():
|
||||||
r = validate_csv_template(
|
r = validate_csv_template(
|
||||||
"weight",
|
"weight",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user