feat: Refactor activity import logic and enhance CSV handling

- Replaced the deprecated `resolve_activity_log_column_patch_from_csv` function with `activity_csv_registry_updates_from_mapped` to streamline updates from CSV mappings. - Updated the `_import_activity` function to utilize the new registry updates, improving data integrity during activity imports. - Enhanced the activity module registry by adding German labels for various fields, improving localization support. - Refactored the session metrics handling to ensure only relevant fields are processed, enhancing the overall robustness of CSV imports.
2026-04-15 10:35:48 +02:00 · 2026-04-15 10:35:48 +02:00 · 08eae86ddc
commit 08eae86ddc
parent 9d47c4ef84
6 changed files with 181 additions and 93 deletions
--- a/backend/csv_parser/executor.py
+++ b/backend/csv_parser/executor.py
@ -808,16 +808,14 @@ def _import_activity(
 ) -> dict[str, int]:
    from data_layer.activity_time_normalize import normalize_activity_start
    from data_layer.activity_persistence_orchestrator import (
+        activity_csv_registry_updates_from_mapped,
        find_activity_duplicate_id,
        insert_activity_csv_minimal,
        new_activity_id,
        run_activity_post_write_hooks_import,
        update_activity_columns,
    )
-    from data_layer.activity_session_metrics import (
-        resolve_activity_log_column_patch_from_csv,
-        upsert_session_metrics_from_csv_mapped,
-    )
+    from data_layer.activity_session_metrics import upsert_session_metrics_from_csv_mapped

    rows_total = 0
    inserted = 0
@ -898,9 +896,7 @@ def _import_activity(
            training_type_id, training_category, training_subcategory = _resolve_training_type_for_activity(
                cur, wtype, profile_id
            )
-            column_patch = resolve_activity_log_column_patch_from_csv(
-                cur, mapped, training_category, training_type_id
-            )
+            registry_updates = activity_csv_registry_updates_from_mapped(mapped)
            existing_id = find_activity_duplicate_id(cur, profile_id, iso, workout_start_t)

            if existing_id:
@ -919,7 +915,7 @@ def _import_activity(
                    "training_subcategory": training_subcategory,
                    "source": "csv",
                }
-                upd.update(column_patch)
+                upd.update(registry_updates)
                update_activity_columns(cur, profile_id, existing_id, upd)
                updated += 1
                affected_ids["activity_log"].append(str(existing_id))
@ -949,8 +945,8 @@ def _import_activity(
                new_entries += 1
                affected_ids["activity_log"].append(str(eid))
                aid = eid
-                if column_patch:
-                    update_activity_columns(cur, profile_id, aid, column_patch)
+                if registry_updates:
+                    update_activity_columns(cur, profile_id, aid, registry_updates)

            run_activity_post_write_hooks_import(
                cur,
--- a/backend/csv_parser/module_registry.py
+++ b/backend/csv_parser/module_registry.py
@ -37,16 +37,43 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = {
    "activity": {
        "table": "activity_log",
        "fields": {
-            "date": {"type": "date", "required": False},
-            "start_time": {"type": "datetime", "required": False},
-            "end_time": {"type": "datetime", "required": False},
-            "activity_type": {"type": "string", "required": True},
-            "duration_min": {"type": "float", "required": False, "min": 0},
-            "kcal_active": {"type": "float", "required": False, "unit": "kcal"},
-            "kcal_resting": {"type": "float", "required": False, "unit": "kcal"},
-            "distance_km": {"type": "float", "required": False, "unit": "km"},
-            "hr_avg": {"type": "float", "required": False, "min": 30, "max": 220},
-            "hr_max": {"type": "float", "required": False, "min": 30, "max": 220},
+            "date": {"type": "date", "required": False, "label_de": "Datum"},
+            "start_time": {
+                "type": "datetime",
+                "required": False,
+                "label_de": "Start (Datum/Uhrzeit)",
+            },
+            "end_time": {"type": "datetime", "required": False, "label_de": "Ende (Datum/Uhrzeit)"},
+            "activity_type": {"type": "string", "required": True, "label_de": "Trainingsart / Workout-Typ"},
+            "duration_min": {"type": "float", "required": False, "min": 0, "label_de": "Dauer (Minuten)"},
+            "kcal_active": {"type": "float", "required": False, "unit": "kcal", "label_de": "Kalorien aktiv"},
+            "kcal_resting": {"type": "float", "required": False, "unit": "kcal", "label_de": "Kalorien Ruhe"},
+            "distance_km": {"type": "float", "required": False, "unit": "km", "label_de": "Distanz (km)"},
+            "hr_avg": {
+                "type": "float",
+                "required": False,
+                "min": 30,
+                "max": 220,
+                "label_de": "Herzfrequenz Ø (bpm)",
+            },
+            "hr_max": {
+                "type": "float",
+                "required": False,
+                "min": 30,
+                "max": 220,
+                "label_de": "Herzfrequenz max (bpm)",
+            },
+            "hr_min": {"type": "int", "required": False, "label_de": "Herzfrequenz min (bpm)"},
+            "rpe": {"type": "int", "required": False, "label_de": "RPE (1–10)"},
+            "pace_min_per_km": {"type": "float", "required": False, "label_de": "Tempo (min/km)"},
+            "cadence": {"type": "int", "required": False, "label_de": "Kadenz"},
+            "avg_power": {"type": "int", "required": False, "label_de": "Leistung Ø (W)"},
+            "elevation_gain": {"type": "int", "required": False, "label_de": "Höhenmeter / Aufstieg"},
+            "temperature_celsius": {"type": "float", "required": False, "label_de": "Temperatur (°C)"},
+            "humidity_percent": {"type": "int", "required": False, "label_de": "Luftfeuchtigkeit (%)"},
+            "avg_hr_percent": {"type": "float", "required": False, "label_de": "HF Ø (% von max)"},
+            "kcal_per_km": {"type": "float", "required": False, "label_de": "Kalorien pro km"},
+            "notes": {"type": "string", "required": False, "label_de": "Notiz"},
        },
        "derive_date_from_datetime_field": "start_time",
        "duplicate_key": ["profile_id", "date", "start_time"],
--- a/backend/data_layer/activity_persistence_orchestrator.py
+++ b/backend/data_layer/activity_persistence_orchestrator.py
@ -7,9 +7,10 @@ Feld-Katalog für CSV-Mappings: get_mappable_activity_field_catalog()
 """
 from __future__ import annotations

+import datetime as dt
 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Mapping, Optional

 from models import ActivityEntry

@ -45,6 +46,89 @@ def find_activity_duplicate_id(
    return str(row["id"]) if row else None


+# Datum/Start/Ende/Typ setzt der CSV-Executor explizit (Normalisierung); nicht aus diesem Patch überschreiben.
+_ACTIVITY_CSV_REGISTRY_EXCLUDE = frozenset({"date", "start_time", "end_time", "activity_type"})
+
+
+def activity_registry_field_keys() -> frozenset[str]:
+    mod = get_module_definition("activity")
+    if not mod:
+        return frozenset()
+    return frozenset((mod.get("fields") or {}).keys())
+
+
+def activity_csv_registry_updates_from_mapped(mapped: Mapping[str, Any]) -> Dict[str, Any]:
+    """
+    activity_log-Updates nur aus Modul-Registry-Feldern (Kernspalten).
+    Trainingsparameter-Keys (nur in training_parameters) laufen über EAV, nicht hier.
+    """
+    mod = get_module_definition("activity")
+    if not mod:
+        return {}
+    fields = mod.get("fields") or {}
+    out: Dict[str, Any] = {}
+
+    def _sf(v: Any) -> float | None:
+        try:
+            if v is None or (isinstance(v, str) and not str(v).strip()):
+                return None
+            return round(float(v), 1)
+        except (TypeError, ValueError):
+            return None
+
+    def _si(v: Any) -> int | None:
+        try:
+            if v is None or (isinstance(v, str) and not str(v).strip()):
+                return None
+            return int(round(float(v)))
+        except (TypeError, ValueError):
+            return None
+
+    def _hr(v: Any) -> float | None:
+        x = _sf(v)
+        if x is None or x < 20 or x > 280:
+            return None
+        return x
+
+    for key, spec in fields.items():
+        if key in _ACTIVITY_CSV_REGISTRY_EXCLUDE:
+            continue
+        if key not in mapped:
+            continue
+        raw = mapped[key]
+        if raw is None or raw == "":
+            continue
+        if isinstance(raw, str) and not raw.strip():
+            continue
+        typ = spec.get("type", "string")
+        if typ == "float":
+            v = _hr(raw) if key in ("hr_avg", "hr_max") else _sf(raw)
+            if v is not None:
+                out[key] = v
+        elif typ == "int":
+            v = _si(raw)
+            if v is not None:
+                out[key] = v
+        elif typ == "datetime":
+            if isinstance(raw, dt.datetime):
+                out[key] = raw.strftime("%Y-%m-%d %H:%M:%S")
+            elif isinstance(raw, dt.date):
+                out[key] = f"{raw.isoformat()} 00:00:00"
+            elif isinstance(raw, str) and raw.strip():
+                out[key] = raw.strip()
+        elif typ == "date":
+            if isinstance(raw, dt.date):
+                out[key] = raw.isoformat()
+            elif isinstance(raw, dt.datetime):
+                out[key] = raw.date().isoformat()
+            elif isinstance(raw, str) and raw.strip():
+                out[key] = raw.strip()
+        else:
+            out[key] = str(raw).strip()
+
+    return out
+
+
 def insert_activity_from_entry(cur, profile_id: str, eid: str, e: ActivityEntry) -> None:
    """INSERT activity_log aus ActivityEntry (manueller API-Pfad)."""
    d = e.model_dump()
@ -296,7 +380,7 @@ def get_mappable_activity_field_catalog(cur, profile_id: str) -> Dict[str, Any]:
                "data_type": s.get("type", "string"),
                "required": bool(s.get("required")),
                "unit": s.get("unit"),
-                "label_de": key,
+                "label_de": s.get("label_de") or key,
            }
        )
    core_fields.sort(key=lambda x: x["key"])
--- a/backend/data_layer/activity_session_metrics.py
+++ b/backend/data_layer/activity_session_metrics.py
@ -9,6 +9,8 @@ import logging
 from decimal import Decimal
 from typing import Any, Dict, List, Mapping, Optional, Sequence

+from csv_parser.module_registry import get_module_definition
+
 logger = logging.getLogger(__name__)

 # activity_log-Spalten, die per training_parameters.source_field aus CSV (Parameter-Key) befüllt werden dürfen.
@ -265,46 +267,6 @@ def _coerce_raw_value_for_parameter(data_type: str, raw: Any) -> Any:
    raise ValueError(data_type)


-def resolve_activity_log_column_patch_from_csv(
-    cur,
-    mapped: Mapping[str, Any],
-    training_category: Optional[str],
-    training_type_id: Optional[int],
-) -> Dict[str, Any]:
-    """
-    Zusätzliche activity_log-Updates aus CSV: Parameter mit source_field → Spalte.
-    """
-    schema = resolve_activity_attribute_schema(cur, training_category, training_type_id)
-    patch: Dict[str, Any] = {}
-    for spec in schema:
-        src_col = (spec.get("source_field") or "").strip()
-        if not src_col or src_col in ACTIVITY_LOG_PATCH_FORBIDDEN:
-            continue
-        if src_col not in ACTIVITY_LOG_PATCHABLE_COLUMNS:
-            continue
-        pkey = spec["key"]
-        if pkey not in mapped:
-            continue
-        raw = mapped[pkey]
-        if raw is None or raw == "":
-            continue
-        dt = spec["data_type"]
-        rules = _validation_rules_dict(spec["validation_rules"])
-        try:
-            coerced = _coerce_raw_value_for_parameter(dt, raw)
-            _validate_single_value(dt, coerced, rules)
-        except (ActivitySessionMetricsError, TypeError, ValueError) as ex:
-            logger.warning(
-                "CSV activity_log patch skipped %s → %s: %s",
-                pkey,
-                src_col,
-                ex,
-            )
-            continue
-        patch[src_col] = coerced
-    return patch
-
-
 def upsert_session_metrics_from_csv_mapped(
    cur,
    profile_id: str,
@ -314,15 +276,10 @@ def upsert_session_metrics_from_csv_mapped(
    training_type_id: Optional[int],
 ) -> None:
    """
-    EAV für Schema-Parameter aus CSV-mapped Werten.
+    EAV für Trainingsparameter aus CSV (nur Keys, die nicht im activity-Modul-Registry liegen).

-    Spalten-gestützte Parameter (source_field ∈ ACTIVITY_LOG_PATCHABLE_COLUMNS) werden
-    ausschließlich über resolve_activity_log_column_patch_from_csv → activity_log
-    geschrieben — hier kein EAV.
-
-    Wenn source_field gesetzt ist, aber **kein** patchbarer Spaltenname (z. B. eigener
-    Key „stola“ wie der Parametername), wäre früher weder Spalten-Update noch EAV erfolgt;
-    dann EAV wie bei reinen Metriken.
+    Kernfelder (Datum, Start, Distanz, HF, …) schreibt der Executor nach activity_log;
+    hier keine doppelten EAV-Zeilen für dieselben Registry-Keys.
    """
    cur.execute(
        "SELECT profile_id FROM activity_log WHERE id = %s",
@ -331,6 +288,8 @@ def upsert_session_metrics_from_csv_mapped(
    row = cur.fetchone()
    if not row or str(row["profile_id"]) != str(profile_id):
        return
+    mod = get_module_definition("activity") or {}
+    activity_registry_keys = frozenset((mod.get("fields") or {}).keys())
    schema = resolve_activity_attribute_schema(cur, training_category, training_type_id)
    for spec in schema:
        pkey = spec["key"]
@ -339,8 +298,7 @@ def upsert_session_metrics_from_csv_mapped(
        raw = mapped[pkey]
        if raw is None or raw == "":
            continue
-        src_col = (spec.get("source_field") or "").strip()
-        if src_col and src_col in ACTIVITY_LOG_PATCHABLE_COLUMNS:
+        if pkey in activity_registry_keys:
            continue
        tid = spec["training_parameter_id"]
        dt = spec["data_type"]
--- a/frontend/src/pages/ActivityPage.jsx
+++ b/frontend/src/pages/ActivityPage.jsx
@ -98,20 +98,21 @@ function buildMetricsPayload(schema, draft) {
      out.push({ parameter_key: s.key, value: !!raw })
      continue
    }
-    if (raw === '' || raw === null || raw === undefined) {
+    const rawStr = raw === null || raw === undefined ? '' : String(raw).trim()
+    if (rawStr === '') {
      if (s.required) throw new Error(`Pflichtfeld: ${s.name_de}`)
      out.push({ parameter_key: s.key, value: null })
      continue
    }
-    let v = raw
+    let v
    if (s.data_type === 'integer') {
-      v = parseInt(String(raw), 10)
+      v = parseInt(rawStr, 10)
      if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
    } else if (s.data_type === 'float') {
-      v = parseFloat(String(raw))
+      v = parseFloat(rawStr)
      if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
    } else {
-      v = String(raw)
+      v = rawStr
    }
    out.push({ parameter_key: s.key, value: v })
  }
@ -532,21 +533,22 @@ export default function ActivityPage() {
        if (!col || !ACTIVITY_LOG_PAYLOAD_KEYS.has(col)) continue
        if (!(s.key in metricDraft)) continue
        const raw = metricDraft[s.key]
-        if (raw === '' || raw === null || raw === undefined) {
+        const rawStr = raw === null || raw === undefined ? '' : String(raw).trim()
+        if (rawStr === '') {
          payload[col] = null
          continue
        }
-        let v = raw
+        let v = rawStr
        if (s.data_type === 'integer') {
-          v = parseInt(String(raw), 10)
-          if (Number.isNaN(v)) continue
+          v = parseInt(rawStr, 10)
+          if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
        } else if (s.data_type === 'float') {
-          v = parseFloat(String(raw))
-          if (Number.isNaN(v)) continue
+          v = parseFloat(rawStr)
+          if (Number.isNaN(v)) throw new Error(`Ungültige Zahl: ${s.name_de}`)
        } else if (s.data_type === 'boolean') {
          v = !!raw
        } else {
-          v = String(raw)
+          v = rawStr
        }
        payload[col] = v
      }
--- a/frontend/src/pages/AdminCsvTemplateEditorPage.jsx
+++ b/frontend/src/pages/AdminCsvTemplateEditorPage.jsx
@ -297,10 +297,19 @@ export default function AdminCsvTemplateEditorPage() {
  const aggregateSleepImport = modMeta?.import_mode === 'apple_sleep_aggregate'
  const targetOptions = useMemo(() => {
    if (!modMeta?.fields || aggregateSleepImport) return []
-    return Object.entries(modMeta.fields).map(([key, meta]) => ({
-      value: key,
-      label: `${key}${meta.required ? ' *' : ''}`,
-    }))
+    const entries = Object.entries(modMeta.fields).map(([key, meta]) => {
+      const title = meta.label_de || meta.name_de || key
+      return {
+        value: key,
+        label: `${title}${meta.required ? ' *' : ''}`,
+        group: meta.from_training_parameter ? 'eav' : 'log',
+      }
+    })
+    entries.sort((a, b) => {
+      if (a.group !== b.group) return a.group === 'log' ? -1 : 1
+      return a.label.localeCompare(b.label, 'de')
+    })
+    return entries
  }, [modMeta, aggregateSleepImport])

  const requiredTargets = useMemo(() => {
@ -1025,11 +1034,23 @@ export default function AdminCsvTemplateEditorPage() {
                  }}
                >
                  <option value="-">— ignorieren</option>
-                  {targetOptions.map((o) => (
-                    <option key={o.value} value={o.value}>
-                      {o.label}
-                    </option>
-                  ))}
+                  {['log', 'eav'].map((g) => {
+                    const opts = targetOptions.filter((o) => o.group === g)
+                    if (!opts.length) return null
+                    const ogLabel =
+                      g === 'log'
+                        ? 'Activity — Kernfelder (activity_log)'
+                        : 'Trainingsparameter (EAV)'
+                    return (
+                      <optgroup key={g} label={ogLabel}>
+                        {opts.map((o) => (
+                          <option key={o.value} value={o.value}>
+                            {o.label}
+                          </option>
+                        ))}
+                      </optgroup>
+                    )
+                  })}
                </select>
              </div>
            ))}