2026-04-15 11:46:31 +02:00
3 changed files with 82 additions and 11 deletions
--- a/backend/csv_parser/core.py
+++ b/backend/csv_parser/core.py
@ -57,6 +57,18 @@ def _split_first_lines(text: str, max_lines: int = 5) -> List[str]:
    return lines


+def canonical_csv_header_label(name: str | None) -> str:
+    """
+    Einheitlicher Spalten-Key für Analyse (Vorlage/Dialog), Import und Signatur.
+    BOM und NBSP (häufig in Excel/Apple-Exporten) werden vereinheitlicht, damit
+    field_mappings exakt zu DictReader-Zeilen passt.
+    """
+    if name is None:
+        return ""
+    s = str(name).replace("\ufeff", "").replace("\u00a0", " ").strip()
+    return s
+
+
 def parse_csv_sample(
    text: str,
    delimiter: str | None = None,
@ -85,7 +97,7 @@ def parse_csv_sample(
        return [], [], delim

    if has_header:
-        headers = [h.strip() for h in rows_raw[0]]
+        headers = [canonical_csv_header_label(h) for h in rows_raw[0]]
        data = rows_raw[1 : 1 + max_data_rows]
    else:
        n = len(rows_raw[0])
@ -103,7 +115,7 @@ def parse_csv_sample(


 def normalize_header_for_signature(name: str) -> str:
-    s = name.strip().lower()
+    s = canonical_csv_header_label(name).lower()
    s = re.sub(r"\s+", "_", s)
    s = re.sub(r"[^a-z0-9_äöüß().%-]+", "_", s)
    return s.strip("_")
@ -111,7 +123,9 @@ def normalize_header_for_signature(name: str) -> str:

 def column_signature(headers: List[str]) -> List[str]:
    """Sortierte normalisierte Spaltennamen für Signatur-Vergleich."""
-    return sorted({normalize_header_for_signature(h) for h in headers if h is not None and str(h).strip()})
+    return sorted(
+        {normalize_header_for_signature(h) for h in headers if h is not None and canonical_csv_header_label(str(h))}
+    )


 def headers_signature_match_score(sig_csv: List[str], sig_template: List[str]) -> float:
@ -178,12 +192,6 @@ def get_csv_import_limits(conn_row: dict | None) -> dict[str, int]:
    return defaults


-def _strip_header_key(k: str | None) -> str:
-    if k is None:
-        return ""
-    return str(k).strip().removeprefix("\ufeff")
-
-
 def iter_csv_dict_rows(
    text: str,
    delimiter: str,
@ -205,4 +213,8 @@ def iter_csv_dict_rows(
            continue
        if not any(v and str(v).strip() for v in row.values()):
            continue
-        yield {_strip_header_key(k): (v or "").strip() for k, v in row.items() if _strip_header_key(k)}
+        yield {
+            canonical_csv_header_label(k): (v or "").strip()
+            for k, v in row.items()
+            if canonical_csv_header_label(k)
+        }
--- a/backend/csv_parser/type_converter.py
+++ b/backend/csv_parser/type_converter.py
@ -14,7 +14,7 @@ from typing import Any, Mapping, Sequence

 from dateutil import parser as dateutil_parser

-from csv_parser.core import normalize_header_for_signature
+from csv_parser.core import canonical_csv_header_label, normalize_header_for_signature
 from csv_parser.field_units import factor_source_to_canonical

 # Alias → strptime (JSON in Kleinbuchstaben)
@ -477,7 +477,12 @@ def _lookup_db_field(csv_col: str, field_mappings: Mapping[str, str]) -> str | N
    CSV-Spaltennamen können Roh-Header sein; Vorlagen-Schlüssel oft normalisiert
    (wie column_signature). Exakter Treffer, dann Schlüssel nach Normalisierung,
    dann Abgleich aller Vorlagen-Keys über deren Normalform.
+
+    Zusätzlich: Präfix-Treffer für lange manuelle Keys (z. B. Apple
+    „Aufgestiegene Höhe (m)“ → ``aufgestiegene_höhe_(m)`` vs. Mapping
+    „aufgestiegene Höhe“ → ``aufgestiegene_höhe``) — gewinnt der längste passende Key.
    """
+    csv_col = canonical_csv_header_label(csv_col)
    v = field_mappings.get(csv_col)
    if v:
        return v if v not in ("-", "_skip") else None
@ -488,6 +493,27 @@ def _lookup_db_field(csv_col: str, field_mappings: Mapping[str, str]) -> str | N
    for k, fv in field_mappings.items():
        if normalize_header_for_signature(str(k)) == norm:
            return fv if fv not in ("-", "_skip") else None
+
+    # Präfix-Match (min. Länge gegen false positives wie „datum“ → „datum_xyz“)
+    best_fv: str | None = None
+    best_nk_len = 0
+    min_prefix = 10
+    for k, fv in field_mappings.items():
+        if not fv or fv in ("-", "_skip"):
+            continue
+        nk = normalize_header_for_signature(str(k))
+        if len(nk) < min_prefix or len(nk) >= len(norm):
+            continue
+        if not norm.startswith(nk):
+            continue
+        boundary = norm[len(nk) : len(nk) + 1]
+        if boundary not in ("", "_", "("):
+            continue
+        if len(nk) > best_nk_len:
+            best_nk_len = len(nk)
+            best_fv = fv
+    if best_fv:
+        return best_fv
    return None


--- a/backend/tests/test_csv_parser_core.py
+++ b/backend/tests/test_csv_parser_core.py
@ -38,6 +38,29 @@ def test_parse_csv_sample_header():
    assert rows[0]["kcal"] == "2000"


+def test_parse_csv_sample_nbsp_in_header_matches_normal_space_key():
+    """Excel/Apple: NBSP (U+00A0) im Spaltennamen — gleicher Key wie normales Leerzeichen."""
+    text = "Aufgestiegene\u00a0Höhe (m);Wert\n12;3\n"
+    headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3)
+    assert headers == ["Aufgestiegene Höhe (m)", "Wert"]
+    assert rows[0]["Aufgestiegene Höhe (m)"] == "12"
+
+
+def test_iter_csv_dict_rows_nbsp_header_canonical():
+    text = "col\u00a0one;b\n1;2\n"
+    rows = list(iter_csv_dict_rows(text, ";", has_header=True))
+    assert rows == [{"col one": "1", "b": "2"}]
+
+
+def test_build_row_field_mapping_space_vs_nbsp_in_csv_header():
+    """Vorlage (Dialog) mit normalem Leerzeichen, CSV mit NBSP — Zuordnung muss greifen."""
+    csv_row = {"Aufgestiegene\u00a0Höhe (m)": "10"}
+    fm = {"Aufgestiegene Höhe (m)": "stola"}
+    tc = {"stola": {"type": "float", "decimal_separator": ".", "flexible": True}}
+    out = build_row_after_mapping(csv_row, fm, tc, module="activity")
+    assert out.get("stola") == 10.0
+
+
 def test_column_signature_sorted_unique():
    sig = column_signature(["B", "a", "a"])
    assert sig == ["a", "b"]
@ -183,6 +206,16 @@ def test_build_row_fddb_raw_header_keys_match_normalized_template():
    assert out["kcal"] is not None and abs(float(out["kcal"]) - (42000 / 4.184)) < 0.1


+def test_build_row_apple_workout_elevation_header_prefix_matches_shorter_mapping_key():
+    """Apple Workouts: „Aufgestiegene Höhe (m)“ normalisiert anders als manuell „aufgestiegene Höhe“."""
+    csv_row = {"Aufgestiegene Höhe (m)": "47.13", "Workout Type": "Outdoor Spaziergang"}
+    fm = {"aufgestiegene Höhe": "stola", "Workout Type": "activity_type"}
+    tc = {"stola": {"type": "string"}}
+    out = build_row_after_mapping(csv_row, fm, tc, module="activity")
+    assert out.get("stola") == "47.13"
+    assert out.get("activity_type") == "Outdoor Spaziergang"
+
+
 def test_convert_date_ddmm_with_seconds():
    d = convert_value(
        "15.01.2024 14:30:00",