Erste Version - Universal CSV Importer für EAV und activity_log #85
|
|
@ -57,6 +57,18 @@ def _split_first_lines(text: str, max_lines: int = 5) -> List[str]:
|
|||
return lines
|
||||
|
||||
|
||||
def canonical_csv_header_label(name: str | None) -> str:
|
||||
"""
|
||||
Einheitlicher Spalten-Key für Analyse (Vorlage/Dialog), Import und Signatur.
|
||||
BOM und NBSP (häufig in Excel/Apple-Exporten) werden vereinheitlicht, damit
|
||||
field_mappings exakt zu DictReader-Zeilen passt.
|
||||
"""
|
||||
if name is None:
|
||||
return ""
|
||||
s = str(name).replace("\ufeff", "").replace("\u00a0", " ").strip()
|
||||
return s
|
||||
|
||||
|
||||
def parse_csv_sample(
|
||||
text: str,
|
||||
delimiter: str | None = None,
|
||||
|
|
@ -85,7 +97,7 @@ def parse_csv_sample(
|
|||
return [], [], delim
|
||||
|
||||
if has_header:
|
||||
headers = [h.strip() for h in rows_raw[0]]
|
||||
headers = [canonical_csv_header_label(h) for h in rows_raw[0]]
|
||||
data = rows_raw[1 : 1 + max_data_rows]
|
||||
else:
|
||||
n = len(rows_raw[0])
|
||||
|
|
@ -103,7 +115,7 @@ def parse_csv_sample(
|
|||
|
||||
|
||||
def normalize_header_for_signature(name: str) -> str:
|
||||
s = name.strip().lower()
|
||||
s = canonical_csv_header_label(name).lower()
|
||||
s = re.sub(r"\s+", "_", s)
|
||||
s = re.sub(r"[^a-z0-9_äöüß().%-]+", "_", s)
|
||||
return s.strip("_")
|
||||
|
|
@ -111,7 +123,9 @@ def normalize_header_for_signature(name: str) -> str:
|
|||
|
||||
def column_signature(headers: List[str]) -> List[str]:
|
||||
"""Sortierte normalisierte Spaltennamen für Signatur-Vergleich."""
|
||||
return sorted({normalize_header_for_signature(h) for h in headers if h is not None and str(h).strip()})
|
||||
return sorted(
|
||||
{normalize_header_for_signature(h) for h in headers if h is not None and canonical_csv_header_label(str(h))}
|
||||
)
|
||||
|
||||
|
||||
def headers_signature_match_score(sig_csv: List[str], sig_template: List[str]) -> float:
|
||||
|
|
@ -178,12 +192,6 @@ def get_csv_import_limits(conn_row: dict | None) -> dict[str, int]:
|
|||
return defaults
|
||||
|
||||
|
||||
def _strip_header_key(k: str | None) -> str:
|
||||
if k is None:
|
||||
return ""
|
||||
return str(k).strip().removeprefix("\ufeff")
|
||||
|
||||
|
||||
def iter_csv_dict_rows(
|
||||
text: str,
|
||||
delimiter: str,
|
||||
|
|
@ -205,4 +213,8 @@ def iter_csv_dict_rows(
|
|||
continue
|
||||
if not any(v and str(v).strip() for v in row.values()):
|
||||
continue
|
||||
yield {_strip_header_key(k): (v or "").strip() for k, v in row.items() if _strip_header_key(k)}
|
||||
yield {
|
||||
canonical_csv_header_label(k): (v or "").strip()
|
||||
for k, v in row.items()
|
||||
if canonical_csv_header_label(k)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from typing import Any, Mapping, Sequence
|
|||
|
||||
from dateutil import parser as dateutil_parser
|
||||
|
||||
from csv_parser.core import normalize_header_for_signature
|
||||
from csv_parser.core import canonical_csv_header_label, normalize_header_for_signature
|
||||
from csv_parser.field_units import factor_source_to_canonical
|
||||
|
||||
# Alias → strptime (JSON in Kleinbuchstaben)
|
||||
|
|
@ -477,7 +477,12 @@ def _lookup_db_field(csv_col: str, field_mappings: Mapping[str, str]) -> str | N
|
|||
CSV-Spaltennamen können Roh-Header sein; Vorlagen-Schlüssel oft normalisiert
|
||||
(wie column_signature). Exakter Treffer, dann Schlüssel nach Normalisierung,
|
||||
dann Abgleich aller Vorlagen-Keys über deren Normalform.
|
||||
|
||||
Zusätzlich: Präfix-Treffer für lange manuelle Keys (z. B. Apple
|
||||
„Aufgestiegene Höhe (m)“ → ``aufgestiegene_höhe_(m)`` vs. Mapping
|
||||
„aufgestiegene Höhe“ → ``aufgestiegene_höhe``) — gewinnt der längste passende Key.
|
||||
"""
|
||||
csv_col = canonical_csv_header_label(csv_col)
|
||||
v = field_mappings.get(csv_col)
|
||||
if v:
|
||||
return v if v not in ("-", "_skip") else None
|
||||
|
|
@ -488,6 +493,27 @@ def _lookup_db_field(csv_col: str, field_mappings: Mapping[str, str]) -> str | N
|
|||
for k, fv in field_mappings.items():
|
||||
if normalize_header_for_signature(str(k)) == norm:
|
||||
return fv if fv not in ("-", "_skip") else None
|
||||
|
||||
# Präfix-Match (min. Länge gegen false positives wie „datum“ → „datum_xyz“)
|
||||
best_fv: str | None = None
|
||||
best_nk_len = 0
|
||||
min_prefix = 10
|
||||
for k, fv in field_mappings.items():
|
||||
if not fv or fv in ("-", "_skip"):
|
||||
continue
|
||||
nk = normalize_header_for_signature(str(k))
|
||||
if len(nk) < min_prefix or len(nk) >= len(norm):
|
||||
continue
|
||||
if not norm.startswith(nk):
|
||||
continue
|
||||
boundary = norm[len(nk) : len(nk) + 1]
|
||||
if boundary not in ("", "_", "("):
|
||||
continue
|
||||
if len(nk) > best_nk_len:
|
||||
best_nk_len = len(nk)
|
||||
best_fv = fv
|
||||
if best_fv:
|
||||
return best_fv
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,29 @@ def test_parse_csv_sample_header():
|
|||
assert rows[0]["kcal"] == "2000"
|
||||
|
||||
|
||||
def test_parse_csv_sample_nbsp_in_header_matches_normal_space_key():
|
||||
"""Excel/Apple: NBSP (U+00A0) im Spaltennamen — gleicher Key wie normales Leerzeichen."""
|
||||
text = "Aufgestiegene\u00a0Höhe (m);Wert\n12;3\n"
|
||||
headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3)
|
||||
assert headers == ["Aufgestiegene Höhe (m)", "Wert"]
|
||||
assert rows[0]["Aufgestiegene Höhe (m)"] == "12"
|
||||
|
||||
|
||||
def test_iter_csv_dict_rows_nbsp_header_canonical():
|
||||
text = "col\u00a0one;b\n1;2\n"
|
||||
rows = list(iter_csv_dict_rows(text, ";", has_header=True))
|
||||
assert rows == [{"col one": "1", "b": "2"}]
|
||||
|
||||
|
||||
def test_build_row_field_mapping_space_vs_nbsp_in_csv_header():
|
||||
"""Vorlage (Dialog) mit normalem Leerzeichen, CSV mit NBSP — Zuordnung muss greifen."""
|
||||
csv_row = {"Aufgestiegene\u00a0Höhe (m)": "10"}
|
||||
fm = {"Aufgestiegene Höhe (m)": "stola"}
|
||||
tc = {"stola": {"type": "float", "decimal_separator": ".", "flexible": True}}
|
||||
out = build_row_after_mapping(csv_row, fm, tc, module="activity")
|
||||
assert out.get("stola") == 10.0
|
||||
|
||||
|
||||
def test_column_signature_sorted_unique():
|
||||
sig = column_signature(["B", "a", "a"])
|
||||
assert sig == ["a", "b"]
|
||||
|
|
@ -183,6 +206,16 @@ def test_build_row_fddb_raw_header_keys_match_normalized_template():
|
|||
assert out["kcal"] is not None and abs(float(out["kcal"]) - (42000 / 4.184)) < 0.1
|
||||
|
||||
|
||||
def test_build_row_apple_workout_elevation_header_prefix_matches_shorter_mapping_key():
|
||||
"""Apple Workouts: „Aufgestiegene Höhe (m)“ normalisiert anders als manuell „aufgestiegene Höhe“."""
|
||||
csv_row = {"Aufgestiegene Höhe (m)": "47.13", "Workout Type": "Outdoor Spaziergang"}
|
||||
fm = {"aufgestiegene Höhe": "stola", "Workout Type": "activity_type"}
|
||||
tc = {"stola": {"type": "string"}}
|
||||
out = build_row_after_mapping(csv_row, fm, tc, module="activity")
|
||||
assert out.get("stola") == "47.13"
|
||||
assert out.get("activity_type") == "Outdoor Spaziergang"
|
||||
|
||||
|
||||
def test_convert_date_ddmm_with_seconds():
|
||||
d = convert_value(
|
||||
"15.01.2024 14:30:00",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user