Universal CSV Importer #70

Merged
Lars merged 54 commits from develop into main 2026-04-11 07:06:47 +02:00
2 changed files with 43 additions and 6 deletions
Showing only changes of commit ebca44829e - Show all commits

View File

@ -8,6 +8,7 @@ from __future__ import annotations
from typing import Any, Mapping
from csv_parser.core import normalize_header_for_signature
from csv_parser.import_row_processing import validate_import_row_processing as validate_import_row_processing_spec
from csv_parser.module_registry import (
get_module_definition,
@ -206,17 +207,28 @@ def validate_csv_template(
)
)
# Signatur vs. gemappte Spalten (nur Hinweis)
# Signatur vs. gemappte Spalten: beide Seiten wie beim Import normalisieren
# (column_signature kann sortierte Normalform aus Analyse sein, field_mappings rohe Header).
if column_signature:
sig_norm = {str(c).strip() for c in column_signature if str(c).strip()}
mapped_cols = {str(k).strip() for k in fm.keys()}
if sig_norm and not sig_norm.intersection(mapped_cols):
sig_forms = {
normalize_header_for_signature(str(c))
for c in column_signature
if str(c).strip()
}
sig_forms.discard("")
mapped_forms = {
normalize_header_for_signature(str(k))
for k in fm.keys()
if str(k).strip()
}
mapped_forms.discard("")
if sig_forms and mapped_forms and not sig_forms.intersection(mapped_forms):
warnings.append(
_issue(
"warning",
"signature_vs_mappings_mismatch",
"column_signature und die Schlüssel in field_mappings haben keine gemeinsame Spalte.",
hint="Signatur dient dem Ranking; für den Import müssen die Kopfzeilen der Datei zu den Keys in field_mappings passen (oder Aliase greifen).",
"column_signature und field_mappings (Schlüssel) haben nach Normalisierung keine gemeinsame Spalte.",
hint="Prüfen Sie, ob die gespeicherte Signatur zur CSV passt; Zuordnungen nutzen rohe Kopfzeilen, die Signatur oft die gleiche Normalform wie in der Analyse.",
)
)

View File

@ -30,6 +30,31 @@ def test_validate_invalid_target_error():
assert any(e["code"] == "invalid_field_mapping" for e in r["errors"])
def test_signature_normalized_vs_raw_mapping_no_false_warning():
"""Analyse liefert normalisierte Signatur; field_mappings nutzt Original-Header wie im Seed."""
r = validate_csv_template(
"blood_pressure",
{
"Datum": "measured_date",
"Zeit": "measured_time",
"Systolisch (mmHg)": "systolic",
"Diastolisch (mmHg)": "diastolic",
"Puls (bpm)": "pulse",
},
{
"measured_date": {"type": "date", "format": "dd.mm.yyyy"},
"measured_time": {"type": "time", "format": "HH:MM"},
"systolic": {"type": "int"},
"diastolic": {"type": "int"},
"pulse": {"type": "int"},
},
None,
["datum", "diastolisch_mmhg", "puls_bpm", "systolisch_mmhg", "zeit"],
)
assert r["valid"] is True
assert not any(w["code"] == "signature_vs_mappings_mismatch" for w in r["warnings"])
def test_validate_duplicate_target_warning():
r = validate_csv_template(
"weight",