From 08eae86ddca357689efda5093ced5eaa842ebc26 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Wed, 15 Apr 2026 10:35:48 +0200
Subject: [PATCH] feat: Refactor activity import logic and enhance CSV handling

- Replaced the deprecated `resolve_activity_log_column_patch_from_csv` function with `activity_csv_registry_updates_from_mapped` to streamline updates from CSV mappings.
- Updated the `_import_activity` function to utilize the new registry updates, improving data integrity during activity imports.
- Enhanced the activity module registry by adding German labels for various fields, improving localization support.
- Refactored the session metrics handling to ensure only relevant fields are processed, enhancing the overall robustness of CSV imports.
---
 backend/csv_parser/executor.py                | 16 ++--
 backend/csv_parser/module_registry.py         | 47 +++++++---
 .../activity_persistence_orchestrator.py      | 88 ++++++++++++++++++-
 .../data_layer/activity_session_metrics.py    | 58 ++----------
 frontend/src/pages/ActivityPage.jsx           | 26 +++---
 .../src/pages/AdminCsvTemplateEditorPage.jsx  | 39 ++++++--
 6 files changed, 181 insertions(+), 93 deletions(-)

diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py
index 3e6fa67..67c78c7 100644
--- a/backend/csv_parser/executor.py
+++ b/backend/csv_parser/executor.py
@@ -808,16 +808,14 @@ def _import_activity(
 ) -> dict[str, int]:
     from data_layer.activity_time_normalize import normalize_activity_start
     from data_layer.activity_persistence_orchestrator import (
+        activity_csv_registry_updates_from_mapped,
         find_activity_duplicate_id,
         insert_activity_csv_minimal,
         new_activity_id,
         run_activity_post_write_hooks_import,
         update_activity_columns,
     )
-    from data_layer.activity_session_metrics import (
-        resolve_activity_log_column_patch_from_csv,
-        upsert_session_metrics_from_csv_mapped,
-    )
+    from data_layer.activity_session_metrics import upsert_session_metrics_from_csv_mapped
 
     rows_total = 0
     inserted = 0
@@ -898,9 +896,7 @@ def _import_activity(
             training_type_id, training_category, training_subcategory = _resolve_training_type_for_activity(
                 cur, wtype, profile_id
             )
-            column_patch = resolve_activity_log_column_patch_from_csv(
-                cur, mapped, training_category, training_type_id
-            )
+            registry_updates = activity_csv_registry_updates_from_mapped(mapped)
             existing_id = find_activity_duplicate_id(cur, profile_id, iso, workout_start_t)
 
             if existing_id:
@@ -919,7 +915,7 @@ def _import_activity(
                     "training_subcategory": training_subcategory,
                     "source": "csv",
                 }
-                upd.update(column_patch)
+                upd.update(registry_updates)
                 update_activity_columns(cur, profile_id, existing_id, upd)
                 updated += 1
                 affected_ids["activity_log"].append(str(existing_id))
@@ -949,8 +945,8 @@ def _import_activity(
                 new_entries += 1
                 affected_ids["activity_log"].append(str(eid))
                 aid = eid
-                if column_patch:
-                    update_activity_columns(cur, profile_id, aid, column_patch)
+                if registry_updates:
+                    update_activity_columns(cur, profile_id, aid, registry_updates)
 
             run_activity_post_write_hooks_import(
                 cur,
diff --git a/backend/csv_parser/module_registry.py b/backend/csv_parser/module_registry.py
index d4e5d2d..ab0b0f2 100644
--- a/backend/csv_parser/module_registry.py
+++ b/backend/csv_parser/module_registry.py
@@ -37,16 +37,43 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
     "activity": {
         "table": "activity_log",
         "fields": {
-            "date": {"type": "date", "required": False},
-            "start_time": {"type": "datetime", "required": False},
-            "end_time": {"type": "datetime", "required": False},
-            "activity_type": {"type": "string", "required": True},
-            "duration_min": {"type": "float", "required": False, "min": 0},
-            "kcal_active": {"type": "float", "required": False, "unit": "kcal"},
-            "kcal_resting": {"type": "float", "required": False, "unit": "kcal"},
-            "distance_km": {"type": "float", "required": False, "unit": "km"},
-            "hr_avg": {"type": "float", "required": False, "min": 30, "max": 220},
-            "hr_max": {"type": "float", "required": False, "min": 30, "max": 220},
+            "date": {"type": "date", "required": False, "label_de": "Datum"},
+            "start_time": {
+                "type": "datetime",
+                "required": False,
+                "label_de": "Start (Datum/Uhrzeit)",
+            },
+            "end_time": {"type": "datetime", "required": False, "label_de": "Ende (Datum/Uhrzeit)"},
+            "activity_type": {"type": "string", "required": True, "label_de": "Trainingsart / Workout-Typ"},
+            "duration_min": {"type": "float", "required": False, "min": 0, "label_de": "Dauer (Minuten)"},
+            "kcal_active": {"type": "float", "required": False, "unit": "kcal", "label_de": "Kalorien aktiv"},
+            "kcal_resting": {"type": "float", "required": False, "unit": "kcal", "label_de": "Kalorien Ruhe"},
+            "distance_km": {"type": "float", "required": False, "unit": "km", "label_de": "Distanz (km)"},
+            "hr_avg": {
+                "type": "float",
+                "required": False,
+                "min": 30,
+                "max": 220,
+                "label_de": "Herzfrequenz Ø (bpm)",
+            },
+            "hr_max": {
+                "type": "float",
+                "required": False,
+                "min": 30,
+                "max": 220,
+                "label_de": "Herzfrequenz max (bpm)",
+            },
+            "hr_min": {"type": "int", "required": False, "label_de": "Herzfrequenz min (bpm)"},
+            "rpe": {"type": "int", "required": False, "label_de": "RPE (1–10)"},
+            "pace_min_per_km": {"type": "float", "required": False, "label_de": "Tempo (min/km)"},
+            "cadence": {"type": "int", "required": False, "label_de": "Kadenz"},
+            "avg_power": {"type": "int", "required": False, "label_de": "Leistung Ø (W)"},
+            "elevation_gain": {"type": "int", "required": False, "label_de": "Höhenmeter / Aufstieg"},
+            "temperature_celsius": {"type": "float", "required": False, "label_de": "Temperatur (°C)"},
+            "humidity_percent": {"type": "int", "required": False, "label_de": "Luftfeuchtigkeit (%)"},
+            "avg_hr_percent": {"type": "float", "required": False, "label_de": "HF Ø (% von max)"},
+            "kcal_per_km": {"type": "float", "required": False, "label_de": "Kalorien pro km"},
+            "notes": {"type": "string", "required": False, "label_de": "Notiz"},
         },
         "derive_date_from_datetime_field": "start_time",
         "duplicate_key": ["profile_id", "date", "start_time"],
diff --git a/backend/data_layer/activity_persistence_orchestrator.py b/backend/data_layer/activity_persistence_orchestrator.py
index 6c4aa82..56d7f04 100644
--- a/backend/data_layer/activity_persistence_orchestrator.py
+++ b/backend/data_layer/activity_persistence_orchestrator.py
@@ -7,9 +7,10 @@ Feld-Katalog für CSV-Mappings: get_mappable_activity_field_catalog()
 """
 from __future__ import annotations
 
+import datetime as dt
 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Mapping, Optional
 
 from models import ActivityEntry
 
@@ -45,6 +46,89 @@ def find_activity_duplicate_id(
     return str(row["id"]) if row else None
 
 
+# Datum/Start/Ende/Typ setzt der CSV-Executor explizit (Normalisierung); nicht aus diesem Patch überschreiben.
+_ACTIVITY_CSV_REGISTRY_EXCLUDE = frozenset({"date", "start_time", "end_time", "activity_type"})
+
+
+def activity_registry_field_keys() -> frozenset[str]:
+    mod = get_module_definition("activity")
+    if not mod:
+        return frozenset()
+    return frozenset((mod.get("fields") or {}).keys())
+
+
+def activity_csv_registry_updates_from_mapped(mapped: Mapping[str, Any]) -> Dict[str, Any]:
+    """
+    activity_log-Updates nur aus Modul-Registry-Feldern (Kernspalten).
+    Trainingsparameter-Keys (nur in training_parameters) laufen über EAV, nicht hier.
+    """
+    mod = get_module_definition("activity")
+    if not mod:
+        return {}
+    fields = mod.get("fields") or {}
+    out: Dict[str, Any] = {}
+
+    def _sf(v: Any) -> float | None:
+        try:
+            if v is None or (isinstance(v, str) and not str(v).strip()):
+                return None
+            return round(float(v), 1)
+        except (TypeError, ValueError):
+            return None
+
+    def _si(v: Any) -> int | None:
+        try:
+            if v is None or (isinstance(v, str) and not str(v).strip()):
+                return None
+            return int(round(float(v)))
+        except (TypeError, ValueError):
+            return None
+
+    def _hr(v: Any) -> float | None:
+        x = _sf(v)
+        if x is None or x < 20 or x > 280:
+            return None
+        return x
+
+    for key, spec in fields.items():
+        if key in _ACTIVITY_CSV_REGISTRY_EXCLUDE:
+            continue
+        if key not in mapped:
+            continue
+        raw = mapped[key]
+        if raw is None or raw == "":
+            continue
+        if isinstance(raw, str) and not raw.strip():
+            continue
+        typ = spec.get("type", "string")
+        if typ == "float":
+            v = _hr(raw) if key in ("hr_avg", "hr_max") else _sf(raw)
+            if v is not None:
+                out[key] = v
+        elif typ == "int":
+            v = _si(raw)
+            if v is not None:
+                out[key] = v
+        elif typ == "datetime":
+            if isinstance(raw, dt.datetime):
+                out[key] = raw.strftime("%Y-%m-%d %H:%M:%S")
+            elif isinstance(raw, dt.date):
+                out[key] = f"{raw.isoformat()} 00:00:00"
+            elif isinstance(raw, str) and raw.strip():
+                out[key] = raw.strip()
+        elif typ == "date":
+            if isinstance(raw, dt.date):
+                out[key] = raw.isoformat()
+            elif isinstance(raw, dt.datetime):
+                out[key] = raw.date().isoformat()
+            elif isinstance(raw, str) and raw.strip():
+                out[key] = raw.strip()
+        else:
+            out[key] = str(raw).strip()
+
+    return out
+
+
 def insert_activity_from_entry(cur, profile_id: str, eid: str, e: ActivityEntry) -> None:
     """INSERT activity_log aus ActivityEntry (manueller API-Pfad)."""
     d = e.model_dump()
@@ -296,7 +380,7 @@ def get_mappable_activity_field_catalog(cur, profile_id: str) -> Dict[str, Any]:
                 "data_type": s.get("type", "string"),
                 "required": bool(s.get("required")),
                 "unit": s.get("unit"),
-                "label_de": key,
+                "label_de": s.get("label_de") or key,
             }
         )
     core_fields.sort(key=lambda x: x["key"])
diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py
index 69109e7..ab3c812 100644
--- a/backend/data_layer/activity_session_metrics.py
+++ b/backend/data_layer/activity_session_metrics.py
@@ -9,6 +9,8 @@ import logging
 from decimal import Decimal
 from typing import Any, Dict, List, Mapping, Optional, Sequence
 
+from csv_parser.module_registry import get_module_definition
+
 logger = logging.getLogger(__name__)
 
 # activity_log-Spalten, die per training_parameters.source_field aus CSV (Parameter-Key) befüllt werden dürfen.
@@ -265,46 +267,6 @@ def _coerce_raw_value_for_parameter(data_type: str, raw: Any) -> Any:
     raise ValueError(data_type)
 
 
-def resolve_activity_log_column_patch_from_csv(
-    cur,
-    mapped: Mapping[str, Any],
-    training_category: Optional[str],
-    training_type_id: Optional[int],
-) -> Dict[str, Any]:
-    """
-    Zusätzliche activity_log-Updates aus CSV: Parameter mit source_field → Spalte.
-    """
-    schema = resolve_activity_attribute_schema(cur, training_category, training_type_id)
-    patch: Dict[str, Any] = {}
-    for spec in schema:
-        src_col = (spec.get("source_field") or "").strip()
-        if not src_col or src_col in ACTIVITY_LOG_PATCH_FORBIDDEN:
-            continue
-        if src_col not in ACTIVITY_LOG_PATCHABLE_COLUMNS:
-            continue
-        pkey = spec["key"]
-        if pkey not in mapped:
-            continue
-        raw = mapped[pkey]
-        if raw is None or raw == "":
-            continue
-        dt = spec["data_type"]
-        rules = _validation_rules_dict(spec["validation_rules"])
-        try:
-            coerced = _coerce_raw_value_for_parameter(dt, raw)
-            _validate_single_value(dt, coerced, rules)
-        except (ActivitySessionMetricsError, TypeError, ValueError) as ex:
-            logger.warning(
-                "CSV activity_log patch skipped %s → %s: %s",
-                pkey,
-                src_col,
-                ex,
-            )
-            continue
-        patch[src_col] = coerced
-    return patch
-
-
 def upsert_session_metrics_from_csv_mapped(
     cur,
     profile_id: str,
@@ -314,15 +276,10 @@ def upsert_session_metrics_from_csv_mapped(
     training_type_id: Optional[int],
 ) -> None:
     """
-    EAV für Schema-Parameter aus CSV-mapped Werten.
+    EAV für Trainingsparameter aus CSV (nur Keys, die nicht im activity-Modul-Registry liegen).
 
-    Spalten-gestützte Parameter (source_field ∈ ACTIVITY_LOG_PATCHABLE_COLUMNS) werden
-    ausschließlich über resolve_activity_log_column_patch_from_csv → activity_log
-    geschrieben — hier kein EAV.
-
-    Wenn source_field gesetzt ist, aber **kein** patchbarer Spaltenname (z. B. eigener
-    Key „stola“ wie der Parametername), wäre früher weder Spalten-Update noch EAV erfolgt;
-    dann EAV wie bei reinen Metriken.
+    Kernfelder (Datum, Start, Distanz, HF, …) schreibt der Executor nach activity_log;
+    hier keine doppelten EAV-Zeilen für dieselben Registry-Keys.
     """
     cur.execute(
         "SELECT profile_id FROM activity_log WHERE id = %s",
@@ -331,6 +288,8 @@ def upsert_session_metrics_from_csv_mapped(
     row = cur.fetchone()
     if not row or str(row["profile_id"]) != str(profile_id):
         return
+    mod = get_module_definition("activity") or {}
+    activity_registry_keys = frozenset((mod.get("fields") or {}).keys())
     schema = resolve_activity_attribute_schema(cur, training_category, training_type_id)
     for spec in schema:
         pkey = spec["key"]
@@ -339,8 +298,7 @@ def upsert_session_metrics_from_csv_mapped(
         raw = mapped[pkey]
         if raw is None or raw == "":
             continue
-        src_col = (spec.get("source_field") or "").strip()
-        if src_col and src_col in ACTIVITY_LOG_PATCHABLE_COLUMNS:
+        if pkey in activity_registry_keys:
             continue
         tid = spec["training_parameter_id"]
         dt = spec["data_type"]
diff --git a/frontend/src/pages/ActivityPage.jsx b/frontend/src/pages/ActivityPage.jsx
index c38283e..ec68925 100644
--- a/frontend/src/pages/ActivityPage.jsx
+++ b/frontend/src/pages/ActivityPage.jsx
@@ -98,20 +98,21 @@ function buildMetricsPayload(schema, draft) {
       out.push({ parameter_key: s.key, value: !!raw })
       continue
     }
-    if (raw === '' || raw === null || raw === undefined) {
+    const rawStr = raw === null || raw === undefined ? '' : String(raw).trim()
+    if (rawStr === '') {
       if (s.required) throw new Error(`Pflichtfeld: ${s.name_de}`)
       out.push({ parameter_key: s.key, value: null })
       continue
     }
-    let v = raw
+    let v
     if (s.data_type === 'integer') {
-      v = parseInt(String(raw), 10)
+      v = parseInt(rawStr, 10)
       if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
     } else if (s.data_type === 'float') {
-      v = parseFloat(String(raw))
+      v = parseFloat(rawStr)
       if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
     } else {
-      v = String(raw)
+      v = rawStr
     }
     out.push({ parameter_key: s.key, value: v })
   }
@@ -532,21 +533,22 @@ export default function ActivityPage() {
         if (!col || !ACTIVITY_LOG_PAYLOAD_KEYS.has(col)) continue
         if (!(s.key in metricDraft)) continue
         const raw = metricDraft[s.key]
-        if (raw === '' || raw === null || raw === undefined) {
+        const rawStr = raw === null || raw === undefined ? '' : String(raw).trim()
+        if (rawStr === '') {
           payload[col] = null
           continue
         }
-        let v = raw
+        let v = rawStr
         if (s.data_type === 'integer') {
-          v = parseInt(String(raw), 10)
-          if (Number.isNaN(v)) continue
+          v = parseInt(rawStr, 10)
+          if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
         } else if (s.data_type === 'float') {
-          v = parseFloat(String(raw))
-          if (Number.isNaN(v)) continue
+          v = parseFloat(rawStr)
+          if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
         } else if (s.data_type === 'boolean') {
           v = !!raw
         } else {
-          v = String(raw)
+          v = rawStr
         }
         payload[col] = v
       }
diff --git a/frontend/src/pages/AdminCsvTemplateEditorPage.jsx b/frontend/src/pages/AdminCsvTemplateEditorPage.jsx
index 90e6fc3..5440e87 100644
--- a/frontend/src/pages/AdminCsvTemplateEditorPage.jsx
+++ b/frontend/src/pages/AdminCsvTemplateEditorPage.jsx
@@ -297,10 +297,19 @@ export default function AdminCsvTemplateEditorPage() {
   const aggregateSleepImport = modMeta?.import_mode === 'apple_sleep_aggregate'
   const targetOptions = useMemo(() => {
     if (!modMeta?.fields || aggregateSleepImport) return []
-    return Object.entries(modMeta.fields).map(([key, meta]) => ({
-      value: key,
-      label: `${key}${meta.required ? ' *' : ''}`,
-    }))
+    const entries = Object.entries(modMeta.fields).map(([key, meta]) => {
+      const title = meta.label_de || meta.name_de || key
+      return {
+        value: key,
+        label: `${title}${meta.required ? ' *' : ''}`,
+        group: meta.from_training_parameter ? 'eav' : 'log',
+      }
+    })
+    entries.sort((a, b) => {
+      if (a.group !== b.group) return a.group === 'log' ? -1 : 1
+      return a.label.localeCompare(b.label, 'de')
+    })
+    return entries
   }, [modMeta, aggregateSleepImport])
 
   const requiredTargets = useMemo(() => {
@@ -1025,11 +1034,23 @@ export default function AdminCsvTemplateEditorPage() {
                   }}
                 >
                   <option value="-">— ignorieren</option>
-                  {targetOptions.map((o) => (
-                    <option key={o.value} value={o.value}>
-                      {o.label}
-                    </option>
-                  ))}
+                  {['log', 'eav'].map((g) => {
+                    const opts = targetOptions.filter((o) => o.group === g)
+                    if (!opts.length) return null
+                    const ogLabel =
+                      g === 'log'
+                        ? 'Activity — Kernfelder (activity_log)'
+                        : 'Trainingsparameter (EAV)'
+                    return (
+                      <optgroup key={g} label={ogLabel}>
+                        {opts.map((o) => (
+                          <option key={o.value} value={o.value}>
+                            {o.label}
+                          </option>
+                        ))}
+                      </optgroup>
+                    )
+                  })}
                 </select>
               </div>
             ))}