From ca8cee990b7c7f816d7f2c08f9803c130f9180eb Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 15 Apr 2026 16:59:11 +0200 Subject: [PATCH 01/21] feat: Enhance activity metrics handling and documentation - Updated the README to include new activity production architecture and phases, improving clarity on the development roadmap. - Enhanced the `ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE` with details on the target architecture and phase plan for production readiness. - Introduced a new function `merge_column_backed_and_eav_metrics` to streamline the merging of metrics from column-backed and EAV sources, ensuring data integrity and reducing duplication. - Refactored session metrics handling to eliminate deprecated synchronization methods, improving the overall efficiency of data processing. - Added unit tests for the new merging logic, ensuring robust validation of metrics handling. --- .claude/docs/README.md | 3 + ...VITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md | 171 ++++++++++++++++ ...CTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md | 23 ++- backend/csv_parser/module_registry.py | 11 +- backend/data_layer/activity_data_canon.py | 78 ++++++++ .../activity_persistence_orchestrator.py | 7 +- .../data_layer/activity_session_metrics.py | 188 ++++++++++++------ .../057_activity_eav_primary_canon.sql | 115 +++++++++++ .../tests/test_activity_session_metrics.py | 105 +++++++++- 9 files changed, 619 insertions(+), 82 deletions(-) create mode 100644 .claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md create mode 100644 backend/data_layer/activity_data_canon.py create mode 100644 backend/migrations/057_activity_eav_primary_canon.sql diff --git a/.claude/docs/README.md b/.claude/docs/README.md index c2834d7..43f1596 100644 --- a/.claude/docs/README.md +++ b/.claude/docs/README.md @@ -55,6 +55,7 @@ _Dieser Ordner `.claude/docs/` ist per `.gitignore`-Ausnahme **versioniert** (Sp | Dashboard-Lab-Widgets | `technical/DASHBOARD_WIDGETS_AGENT_GUIDE.md` | Widget-Katalog + Registrierung (siehe Guide) | | Training Profiler / Resolver | `technical/TRAINING_PROFILE_RESOLVER_LAYER1.md`, `functional/TRAINING_TYPE_PROFILES.md` | Resolver-Module wie im Guide genannt | | Universal CSV Import | `technical/UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md` | `backend/csv_parser/`, `routers/csv_import.py`, `routers/admin_csv_templates.py` | +| Aktivität Produktionsreife | `technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md` (+ EAV-Guide) | `backend/data_layer/activity_session_metrics.py`, `activity_metrics.py`, CSV-Orchestrierung | | Mitgliedschaft / Features | `technical/MEMBERSHIP_SYSTEM.md`, `architecture/FEATURE_ENFORCEMENT.md` | `backend/auth.py`, Feature-Logging, Router mit Enforcement | --- @@ -114,6 +115,8 @@ _Dieser Ordner `.claude/docs/` ist per `.gitignore`-Ausnahme **versioniert** (Sp | `TRAINING_TYPE_PROFILES_TECHNICAL.md` | Trainingsprofile technisch | | `UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md` | Universal CSV: Registry, Executor, Vorlagen, Agent-Checkliste | | `ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md` | Session-Metriken EAV, Attributprofile, Layer-1, Prod-Migration | +| `ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md` | **Zielarchitektur** Aktivität (Spine/EAV/Composites/Import/Layer 1–2) + **Phasenplan A–F** Produktionsreife | +| *(Code)* `backend/data_layer/activity_data_canon.py` | **Kanon** activity CSV-Modul vs. EAV-primär; Legacy-Lesefallback | | `V9D_PHASE2_VITALS_SLEEP.md` | v9d Vitalwerte/Schlaf (Release-Bezug) | --- diff --git a/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md b/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md new file mode 100644 index 0000000..4ffb079 --- /dev/null +++ b/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md @@ -0,0 +1,171 @@ +# Aktivität: Zielarchitektur & Phasenplan (Produktionsreife) + +**Stand:** 2026-04-14 +**Status:** Normative Zielrichtung für `activity_log`, EAV, Composites, Import, Layer 1/2. +**Ergänzt:** `ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md` (Ist-Modell, APIs, Tests). + +--- + +## 1. Leitprinzipien + +| Prinzip | Bedeutung | +|---------|-----------| +| **Layer 1 = Single Source of Truth** | Alle Auswertungen (Charts, Scores, strukturierte Platzhalter) lesen **nur** über `data_layer` (kanonische Funktionen). Keine parallele SQL-Logik in Routern oder im Placeholder-Resolver für Aktivität. | +| **Eine semantische Größe, eine kanonische Quelle** | Kein Dauer-Sync derselben Bedeutung in `activity_log`-Spalte **und** EAV. Übergang: dokumentierte Abschaltung, nicht implizites Driften. | +| **Spine vs. Parameter** | `activity_log` trägt Identität, Zeit, Typ, Notizen, Audit + **heiße** universelle Skalare (siehe §2.2). Alles Typ-/Admin-Dynamische über EAV. | +| **Composites = Archetyp im Code, Konfiguration in der DB** | Struktur (7+2 Archetypen) und Validierung **versioniert im Repo**; Admin **wählt** Archetyp, **benennt** Slots, **bindet** Sportarten, **mappt** CSV → `(parameter_id, slot_key)`. Kein freies JSON-Schema im Admin. | +| **Import explizit** | Jede CSV-Spalte hat ein klares Ziel: Spine-Spalte, skalarer Parameter oder **Slot** eines Composite-Parameters. Typkonvertierung zentral (Executor / Converter), nicht verteilt. | + +--- + +## 2. Zielarchitektur (Gesamtbild) + +### 2.1 Schichtenmodell + +``` +[CSV / UI / API Write] + ↓ +Orchestrator & Router (Auth, Transaktionen, Feature-Checks) + ↓ +Persistenz: activity_log (Spine + heiße Skalare) + activity_session_metrics (EAV) + ↓ +Layer 1: data_layer (activity_session_metrics.py, activity_metrics.py, …) + ↓ +Layer 2a/2b: Platzhalter-Resolver (Formatierung), Chart-Endpoints (Chart.js-Shapes) + ↓ +KI / UI / Export +``` + +- **Orchestrator:** Schreibpfad, Konsistenz nach Write (kein zweites „Lesen der Wahrheit“ neben Layer 1; optional nur Post-Write-Hooks). +- **Resolver:** für Aktivität **kein** direkter DB-Zugriff; nur Aufruf von Layer 1. + +### 2.2 `activity_log` (Spine + heiße Skalare) + +**Maschinenlesbarer Kanon:** `backend/data_layer/activity_data_canon.py` (`ACTIVITY_MODULE_REGISTRY_FIELD_KEYS`, `ACTIVITY_EAV_PRIMARY_PARAMETER_KEYS`, Legacy-Lesefallback für EAV-primäre Parameter). + +**Immer (fachlich minimal + listenfähig):** `id`, `profile_id`, Kalender-/Zeitfenster (`date`, `started_at`/`ended_at`, ggf. `start_time`/`end_time` bis Konsolidierung), `duration_min`, `training_type_id` (+ ggf. denormalisierte Kategorie), Legacy `activity_type`, `notes`, `source`, `created`. + +**Heiße Skalare (CSV-Modul + `source_field` nach Migration 057):** u. a. `kcal_active`, `kcal_resting`, `distance_km`, `hr_avg`/`hr_max` (Parameter `avg_hr`/`max_hr`), `duration_min`, `rpe` – für Listen und Standard-Aggregate ohne EAV-Join. + +**EAV-primär (erweiterte Metriken):** z. B. Kadenz, Pace, Leistung, Höhe, Umgebung — `training_parameters.source_field` = NULL; Import schreibt EAV; bei leerem EAV optional Lesefallback auf bestehende `activity_log`-Spalte (Migration 057 + Merge-Logik). + +**Session-Qualität / Auswertungsblob:** z. B. `evaluation`, `quality_label`, `overall_score` – **kein** EAV-Parameter-Raster; semantisch „Ergebnis der Einheit“. + +**Nicht dauerhaft doppelt:** dieselbe Semantik nicht parallel pflegen; siehe entfallener Spalte→EAV-Schreib-Sync, Lesepfad `merge_column_backed_and_eav_metrics`. + +### 2.3 EAV (`activity_session_metrics`) + +- **Skalare:** ein `training_parameter`, genau eine `value_*`-Spalte (wie heute). +- **Composites:** ein `training_parameter` pro Composite-Instanz, **ein** gespeichertes Dokument pro Session (serialisiert z. B. in `value_text` als JSON **oder** künftig dedizierte JSONB-Spalte – technische Entscheidung in eigener Migration, Vertrag im Archetyp). +- **Merge-/Schema-Logik:** weiterhin zentral in `activity_session_metrics.py` (effektives Schema aus Kategorie + Typ-Overrides). + +### 2.4 Composite-Metamodell (Ziel) + +**Archetypen (Code, begrenzte Menge):** u. a. Band-/Zonenverteilung, Sequenz-/Übergangsprofil, Intervallblock-, Ereignis-/Aktions-, Kopplungs-/Effizienz-, Modellparameter-Profil; optional Technik-/Zyklus-, Readiness-/Recovery-Profil. + +**Pro Archetyp:** feste strukturelle Regeln (erlaubte Slots, Typen, Pflicht/Optional), Validator + Version. + +**In der DB (Admin):** Zuordnung „Parameter X hat Archetyp A“, Slot-Labels (DE/EN), Einheiten, Aktivierung pro Sportart/Kategorie, Sortierung. + +**Import:** CSV-Spalten → `(training_parameter_id, slot_key)` mit stabilen Keys (`z1_sec`, …), nie nur „Spaltenreihenfolge“. + +### 2.5 Universal CSV & Admin + +- Vorlagen: Mapping inkl. **Composite-Slots** und Typkonvertierung (vollständige Matrix Ziel). +- UI: Trennung **Kern activity_log** vs. **Parameter/EAV** vs. **Composite-Blöcke** (optisch/UX), um Doppel-Tabellen-Chaos zu vermeiden. + +### 2.6 Layer 2 (Platzhalter & Diagramme) + +- Datenbezug **nur** Layer 1. +- Registry-Einträge: `data_layer_module` / `data_layer_function` pflegen; Composite-Auswertung ggf. über Hilfsfunktionen, die JSON → normierte Struktur für Prompts/Charts liefern. + +--- + +## 3. Ist → Soll (Kurz) + +| Bereich | Ist (typisch) | Soll | +|---------|----------------|------| +| Schreibpfad | Teilweise Doppelhaltung Spalte ↔ EAV, Sync-Hooks | Kanon + gezielte Abschaltung; eine Quelle pro Semantik | +| Lesepfad | Layer 1 wächst; Legacy-Spalten noch relevant | `get_activity_session_logical_unit` / `activity_metrics` als alleinige Wahrheit für Consumer | +| Composites | Noch nicht im Einklang mit EAV-Metamodell | Archetypen + Slot-Admin + ein Dokument pro Parameter/Session | +| Import | Mapping teilweise; Typkonvertierung lückenhaft | Vollständige Konvertierung + Composite-Zusammenbau | +| Resolver | Aktivität sauber über Layer 1 | Profil/Focus ggf. später ebenfalls aus Layer 1 | + +--- + +## 4. Vorgehensmodell (Phasen) + +Phasen sind **sequentiell** wo „Abhängigkeit“ steht; Teile können parallel (z. B. UI-Polish) laufen, wenn der Kanon steht. + +### Phase A – Kanon & Abschaltplan (Grundlage) + +**Inhalt:** Schriftliche **Kanon-Tabelle**: pro Messgröße genau eine Quelle (`activity_log` | `eav_scalar` | `eav_composite` | `session_quality`). Liste der Keys, für die **Sync/Spiegelung** endet. + +**Definition of Done:** Review im Team; Referenz in diesem Dokument oder Verweis auf Gitea-Kommentar; keine Code-Änderung zwingend. + +**Erster konkreter Schritt:** Kanon-Tabelle als Checkliste (Spreadsheet oder Gitea-Issue) – **eine Zeile pro Semantik**. + +--- + +### Phase B – Lesepfad härten (Layer 1) + +**Inhalt:** Sicherstellen, dass **alle** relevanten Consumer (mind. `activity_metrics` für Platzhalter/Charts, Activity-Detail-API) dieselbe Merge-/Fallback-Logik nutzen; Legacy-Spalten nur noch als dokumentierter Fallback bis Enddatum. + +**Definition of Done:** Kurze Audit-Liste „Router/Resolver greifen nicht an Aktivität vorbei“; Tests oder manuelle Stichprobe für Detail + ein Chart + 2 Platzhalter. + +**Abhängigkeit:** Phase A für „welche Spalten noch Fallback sind“. + +--- + +### Phase C – Schreibpfad entschlacken + +**Inhalt:** Orchestrierung/CSV: kein Schreiben derselben Semantik an zwei Orten; `sync_column_backed_session_metrics` (o. ä.) **stufig abschalten** oder auf Notfall-Flag; Import schreibt gemäß Kanon. + +**Definition of Done:** Deploy auf Prod mit Monitoring; Stichprobe Import + manuelle Bearbeitung; keine Regression in Listenansicht. + +**Abhängigkeit:** Phase A + B (sonst Lücken beim Lesen). + +--- + +### Phase D – Composite MVP + +**Inhalt:** Ein Archetyp end-to-end (z. B. **Band-/Zonenverteilung**): Code-Validator, DB-Binding (Parameter + Slots), Admin-UI minimal, Import **5 Spalten → ein JSON-Dokument** mit festen Keys, Layer-1-Read (Roh + optional `expand_*`). + +**Definition of Done:** Eine Sportart/Kategorie befüllbar; Dokumentation des JSON-Vertrags im Repo; pytest für Validator/Zusammenbau wo möglich. + +**Abhängigkeit:** Phase A (Kanon „Composites nur als Dokument, nicht doppelt in Spalten“). + +--- + +### Phase E – Composite-Ausbau & Typkonvertierung Import + +**Inhalt:** Weitere Archetypen nach Priorität; Universal-CSV **vollständige** Typkonvertierung für alle gemappten Ziele; Dialog-/Mapping-Konzept (Kern vs. Parameter vs. Composite). + +**Definition of Done:** Matrix „Zieltyp × Converter“ gepflegt; Admin-Flow reviewt. + +--- + +### Phase F – Produktionshärtung + +**Inhalt:** Performance-Indizes bei Bedarf; Observability (Import-Fehler, Validierungs-Fails); Resolver/Profil optional komplett ohne `get_db` für domänische Daten; Doku + Gitea-Issues geschlossen/aktualisiert. + +--- + +## 5. Was zuerst? + +**Sofort (nächster Schritt):** **Phase A – Kanon-Tabelle** (eine Semantik pro Zeile, eine Quelle). Ohne diese Entscheidung riskieren Phase B/C falsche Abschaltungen. + +Direkt danach: **Phase B** (Lesepfad), dann **Phase C** (Schreibpfad), dann **Phase D** (ein Composite-MVP). + +--- + +## 6. Referenzen + +- `ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md` – Tabellen, APIs, Tests, Backfill-Hinweise +- `UNIVERSAL_CSV_IMPORT_AGENT_GUIDE.md` – Executor, Vorlagen, Typen +- `PLACEHOLDER_REGISTRY_FRAMEWORK.md` – Layer-2-Registrierung +- `functional/DATA_ARCHITECTURE.md` – fachliche Datenarchitektur (Querschnitt) + +--- + +**Version:** 1.0 · Bei Meilensteinen Phasen A–F hier Status-Zeile ergänzen (Datum + kurz „erledigt/in Arbeit“). diff --git a/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md b/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md index 186f46c..604834f 100644 --- a/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md +++ b/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md @@ -4,6 +4,10 @@ **Status:** Kern-Backend (Migration 054, Layer 1, Admin- & Nutzer-API) umgesetzt; Admin-UI & CSV-Mapping folgen. **Ziel:** Sportspezifische **Attributprofile** (Kategorie + optional Trainingstyp-Override) administrierbar; Messwerte pro Session in **EAV**; **alle Auswertungen** sollen künftig über **Layer 1** (`data_layer`) laufen. +**Zielarchitektur, Phasenplan (Produktionsreife):** [`ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md`](./ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md) – Kanon `activity_log`/EAV, Composites, Import, Layer 1/2, Reihenfolge A–F. + +**Kanon (Code):** `backend/data_layer/activity_data_canon.py` (Repo-Root) — CSV-Modul `activity` vs. EAV-primär; Migration **057**. + --- ## 1. Produktions-Migrationen (Pflicht) @@ -41,7 +45,9 @@ | Modul | Pfad | Aufgabe | |-------|------|---------| -| Session-Metriken & Schema | `backend/data_layer/activity_session_metrics.py` | `resolve_activity_attribute_schema`, `fetch_activity_session_metrics`, `replace_activity_session_metrics`, `get_activity_session_logical_unit`, `enrich_sessions_with_metrics`. | +| Session-Metriken & Schema | `backend/data_layer/activity_session_metrics.py` | `resolve_activity_attribute_schema`, `fetch_activity_session_metrics`, `replace_activity_session_metrics`, `get_activity_session_logical_unit`, `enrich_sessions_with_metrics`, `merge_column_backed_and_eav_metrics`. | + +**Spalten vs. EAV (Lesepfad):** `merge_column_backed_and_eav_metrics` / `get_activity_session_logical_unit` / `enrich_sessions_with_metrics` werten Parameter mit `source_field` **primär aus `activity_log`** aus; EAV ist Fallback (z. B. Legacy) oder für Parameter ohne Spalte. **Kein** automatischer Spalte→EAV-Schreib-Sync mehr in `run_activity_post_write_hooks` / Import-Hooks (vermeidet Doppelhaltung). **Regeln für Agenten:** @@ -81,10 +87,21 @@ Router: `backend/routers/admin_training_parameters.py`, `backend/routers/admin_a ## 5. Agent-Checkliste (nächste Iterationen) +Siehe **Phasen A–F** in [`ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md`](./ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md). Kurz: + +- [ ] **Phase A:** Kanon-Tabelle (eine Quelle pro Semantik). +- [ ] **Phase B:** Lesepfad Layer 1 härten (Consumer-Audit). +- [ ] **Phase C:** Schreibpfad: Doppelhaltung / Sync stufenweise abschalten. +- [ ] **Phase D:** Composite-MVP (ein Archetyp E2E). +- [ ] **Phase E:** Archetypen ausbauen + CSV-Typkonvertierung vollständig + Mapping-UX. +- [ ] **Phase F:** Härtung Prod (Indizes, Observability, Doku). + +Legacy-Punkte: + - [x] Admin-UI: `frontend/src/pages/AdminActivityAttributeProfilesPage.jsx`, Route `/admin/activity-attribute-profiles`, Admin-Nav-Gruppe „Trainingstypen“. - [x] `/activity` Frontend: Bearbeiten lädt `GET /api/activity/{id}`, dynamische Felder + `PUT /api/activity/{id}/metrics`. -- [ ] Universal CSV: Mapping-Spalten → `training_parameters.key` + Schreiben in EAV (Executor). -- [ ] Optional: Backfill `activity_log.*` → `activity_session_metrics` nach `source_field`. +- [ ] Universal CSV: Mapping inkl. EAV/Composite-Ziele + Executor (fortlaufend). +- [ ] Optional: Backfill / Abschluss `source_field`-Pfad nach Kanon (Phase A/C). - [ ] Dedupe Polar/Apple: nach stabilen `started_at`/`ended_at` + Policy (eigenes Issue). --- diff --git a/backend/csv_parser/module_registry.py b/backend/csv_parser/module_registry.py index ab0b0f2..3786327 100644 --- a/backend/csv_parser/module_registry.py +++ b/backend/csv_parser/module_registry.py @@ -34,6 +34,8 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = { }, }, }, + # Kanon: nur Kern/spine + „heiße“ Metriken → activity_log. Erweiterte Parameter → training_parameters / EAV + # (siehe backend/data_layer/activity_data_canon.py). "activity": { "table": "activity_log", "fields": { @@ -63,16 +65,7 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = { "max": 220, "label_de": "Herzfrequenz max (bpm)", }, - "hr_min": {"type": "int", "required": False, "label_de": "Herzfrequenz min (bpm)"}, "rpe": {"type": "int", "required": False, "label_de": "RPE (1–10)"}, - "pace_min_per_km": {"type": "float", "required": False, "label_de": "Tempo (min/km)"}, - "cadence": {"type": "int", "required": False, "label_de": "Kadenz"}, - "avg_power": {"type": "int", "required": False, "label_de": "Leistung Ø (W)"}, - "elevation_gain": {"type": "int", "required": False, "label_de": "Höhenmeter / Aufstieg"}, - "temperature_celsius": {"type": "float", "required": False, "label_de": "Temperatur (°C)"}, - "humidity_percent": {"type": "int", "required": False, "label_de": "Luftfeuchtigkeit (%)"}, - "avg_hr_percent": {"type": "float", "required": False, "label_de": "HF Ø (% von max)"}, - "kcal_per_km": {"type": "float", "required": False, "label_de": "Kalorien pro km"}, "notes": {"type": "string", "required": False, "label_de": "Notiz"}, }, "derive_date_from_datetime_field": "start_time", diff --git a/backend/data_layer/activity_data_canon.py b/backend/data_layer/activity_data_canon.py new file mode 100644 index 0000000..17ec223 --- /dev/null +++ b/backend/data_layer/activity_data_canon.py @@ -0,0 +1,78 @@ +""" +Kanonische Aufteilung activity_log vs. EAV für Aktivitätssessions. + +Single Source für: welche Felder das CSV-/Registry-Modul „activity“ direkt in activity_log schreibt, +und welche training_parameters primär über EAV laufen (mit optionalem Lesefallback auf Legacy-Spalten). + +Normative Doku: .claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md +""" +from __future__ import annotations + +from typing import Dict, Final + +# ── activity_log: Modul „activity“ (Universal-CSV-Kern) ─────────────────────── +# Nur diese Keys erscheinen in csv_parser.module_registry MODULE_DEFINITIONS["activity"].fields. +# Alles Weitere: training_parameters + EAV (Import über upsert_session_metrics_from_csv_mapped). +ACTIVITY_MODULE_REGISTRY_FIELD_KEYS: Final[frozenset[str]] = frozenset( + { + "date", + "start_time", + "end_time", + "activity_type", + "duration_min", + "kcal_active", + "kcal_resting", + "distance_km", + "hr_avg", + "hr_max", + "rpe", + "notes", + } +) + +# Parameter-Keys (training_parameters.key), die primär in EAV geführt werden; source_field nach Migration 057 NULL. +# Lesefallback: activity_log-Spalte unter ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM, falls EAV leer. +ACTIVITY_EAV_PRIMARY_PARAMETER_KEYS: Final[frozenset[str]] = frozenset( + { + "min_hr", + "pace_min_per_km", + "cadence", + "avg_power", + "elevation_gain", + "temperature_celsius", + "humidity_percent", + "avg_hr_percent", + "kcal_per_km", + } +) + +# Spaltenname activity_log für Legacy-Lesefallback (Merge), wenn EAV für den Parameter fehlt. +ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM: Final[Dict[str, str]] = { + "min_hr": "hr_min", + "pace_min_per_km": "pace_min_per_km", + "cadence": "cadence", + "avg_power": "avg_power", + "elevation_gain": "elevation_gain", + "temperature_celsius": "temperature_celsius", + "humidity_percent": "humidity_percent", + "avg_hr_percent": "avg_hr_percent", + "kcal_per_km": "kcal_per_km", +} + +# Spalten, die mit training_parameters.source_field (nach Migration 057) noch activity_log abbilden. +# Erweiterte Metriken sind EAV-primär — nicht hier auflisten. +ACTIVITY_LOG_PATCHABLE_COLUMNS: Final[frozenset[str]] = frozenset( + { + "start_time", + "end_time", + "activity_type", + "duration_min", + "kcal_active", + "kcal_resting", + "hr_avg", + "hr_max", + "distance_km", + "rpe", + "notes", + } +) diff --git a/backend/data_layer/activity_persistence_orchestrator.py b/backend/data_layer/activity_persistence_orchestrator.py index 56d7f04..6085550 100644 --- a/backend/data_layer/activity_persistence_orchestrator.py +++ b/backend/data_layer/activity_persistence_orchestrator.py @@ -15,7 +15,6 @@ from typing import Any, Dict, List, Mapping, Optional from models import ActivityEntry from csv_parser.module_registry import get_module_definition -from data_layer.activity_session_metrics import sync_column_backed_session_metrics logger = logging.getLogger(__name__) @@ -248,7 +247,7 @@ def insert_activity_csv_minimal( def run_activity_post_write_hooks(cur, profile_id: str, eid: str) -> None: - """Auto-Eval (falls aktiv) + EAV-Spiegel aus activity_log-Spalten.""" + """Auto-Eval (falls aktiv). Kein Spalte→EAV-Sync: Lesepfad merge_column_backed_and_eav_metrics.""" if _EVALUATION_AVAILABLE and _evaluate_and_save_activity: cur.execute( """ @@ -269,7 +268,6 @@ def run_activity_post_write_hooks(cur, profile_id: str, eid: str) -> None: _evaluate_and_save_activity(cur, eid, activity_dict, training_type_id, profile_id) except Exception as eval_error: logger.error("[AUTO-EVAL] activity %s: %s", eid, eval_error) - sync_column_backed_session_metrics(cur, str(profile_id), str(eid)) def run_activity_post_write_hooks_import( @@ -286,7 +284,7 @@ def run_activity_post_write_hooks_import( kcal_active: Any, kcal_resting: Any, ) -> None: - """Eval + EAV nach Legacy-Import mit vorgebautem Kontext-Dict.""" + """Auto-Eval nach Import. Kein Spalte→EAV-Sync (siehe run_activity_post_write_hooks).""" if _EVALUATION_AVAILABLE and training_type_id and _evaluate_and_save_activity: try: activity_dict = { @@ -308,7 +306,6 @@ def run_activity_post_write_hooks_import( _evaluate_and_save_activity(cur, eid, activity_dict, training_type_id, profile_id) except Exception as eval_err: logger.warning("[activity import] Auto-Eval fehlgeschlagen: %s", eval_err) - sync_column_backed_session_metrics(cur, str(profile_id), str(eid)) def merge_activity_csv_module_fields( diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py index ab3c812..b5ef19d 100644 --- a/backend/data_layer/activity_session_metrics.py +++ b/backend/data_layer/activity_session_metrics.py @@ -10,36 +10,10 @@ from decimal import Decimal from typing import Any, Dict, List, Mapping, Optional, Sequence from csv_parser.module_registry import get_module_definition +from data_layer.activity_data_canon import ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM logger = logging.getLogger(__name__) -# activity_log-Spalten, die per training_parameters.source_field aus CSV (Parameter-Key) befüllt werden dürfen. -# Muss mit sync_column_backed_session_metrics übereinstimmen (inkl. Kernmetriken wie hr_avg). -ACTIVITY_LOG_PATCHABLE_COLUMNS = frozenset( - { - "start_time", - "end_time", - "activity_type", - "duration_min", - "kcal_active", - "kcal_resting", - "hr_avg", - "hr_max", - "hr_min", - "distance_km", - "rpe", - "pace_min_per_km", - "cadence", - "avg_power", - "elevation_gain", - "temperature_celsius", - "humidity_percent", - "avg_hr_percent", - "kcal_per_km", - "notes", - } -) - # Diese Spalten nicht aus CSV-Parameter-Zuordnung überschreiben (kommen aus Typ-Mapping / System). ACTIVITY_LOG_PATCH_FORBIDDEN = frozenset( { @@ -328,13 +302,87 @@ def upsert_session_metrics_from_csv_mapped( ) +def merge_column_backed_and_eav_metrics( + header: Mapping[str, Any], + schema: Sequence[Dict[str, Any]], + eav_metrics: Sequence[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """ + Effektive Metrikliste: Pro Schema-Parameter mit source_field gilt activity_log als kanonisch, wenn + die Spalte befüllt und koerzierbar ist; sonst Fallback EAV. Reine EAV-Parameter (ohne Spalte oder + leere Spalte) kommen aus EAV. Verhindert doppelte Semantik ohne Schreib-Sync. + """ + eav_by_key = {m["key"]: m for m in eav_metrics} + merged: List[Dict[str, Any]] = [] + keys_handled: set[str] = set() + + for s in schema: + k = s["key"] + tid = s["training_parameter_id"] + dt = s["data_type"] + unit = s.get("unit") + sf = s.get("source_field") + + used_column = False + if sf and isinstance(sf, str) and str(sf).strip(): + col = str(sf).strip() + if col in header and header[col] is not None: + try: + val = _coerce_raw_value_for_parameter(dt, header[col]) + merged.append( + { + "training_parameter_id": tid, + "key": k, + "data_type": dt, + "unit": unit, + "value": val, + } + ) + used_column = True + keys_handled.add(k) + except (TypeError, ValueError): + pass + + if used_column: + continue + if k in eav_by_key: + merged.append(dict(eav_by_key[k])) + keys_handled.add(k) + continue + + legacy_col = ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM.get(k) + if legacy_col and legacy_col in header and header[legacy_col] is not None: + try: + val = _coerce_raw_value_for_parameter(dt, header[legacy_col]) + merged.append( + { + "training_parameter_id": tid, + "key": k, + "data_type": dt, + "unit": unit, + "value": val, + } + ) + keys_handled.add(k) + except (TypeError, ValueError): + pass + + for m in eav_metrics: + if m["key"] in keys_handled: + continue + merged.append(dict(m)) + + merged.sort(key=lambda x: x["key"]) + return merged + + def sync_column_backed_session_metrics(cur, profile_id: str, activity_log_id: str) -> None: """ - EAV-Zeilen für alle Schema-Parameter mit gesetztem source_field aus der activity_log-Zeile - schreiben (Upsert) bzw. bei NULL in der Quellspalte löschen. Reine Layer-1-Logik; keine Router-Abhängigkeit. + [Veraltet / nicht mehr in Schreibpfaden aufgerufen] - Synchron mit Übergangsphase: activity_log bleibt kanonisch für klassische Spalten; EAV spiegelt dieselben - Werte für Profil/Platzhalter/Detail-API, ohne replace_activity_session_metrics aufzurufen. + Früher: EAV spiegelte activity_log-Spalten für Parameter mit source_field. + Kanon: Spaltenwerte werden bei merge_column_backed_and_eav_metrics beim Lesen berücksichtigt; keine + doppelte Speicherung. Funktion bleibt für optionale Admin-/Reparatur-Skripte. """ cur.execute("SELECT * FROM activity_log WHERE id = %s", (activity_log_id,)) row = cur.fetchone() @@ -527,36 +575,7 @@ def get_activity_session_logical_unit(cur, profile_id: str, activity_log_id: str cur, header.get("training_category"), header.get("training_type_id") ) metrics = fetch_activity_session_metrics(cur, activity_log_id) - by_key = {m["key"]: m for m in metrics} - merged_metrics: List[Dict[str, Any]] = list(metrics) - for s in schema: - k = s["key"] - if k in by_key: - continue - sf = s.get("source_field") - if not sf or (isinstance(sf, str) and not str(sf).strip()): - continue - col = str(sf).strip() - if col not in header: - continue - raw = header.get(col) - if raw is None: - continue - dt = s["data_type"] - try: - val = _coerce_raw_value_for_parameter(dt, raw) - except (TypeError, ValueError): - continue - merged_metrics.append( - { - "training_parameter_id": s["training_parameter_id"], - "key": k, - "data_type": dt, - "unit": s.get("unit"), - "value": val, - } - ) - merged_metrics.sort(key=lambda x: x["key"]) + merged_metrics = merge_column_backed_and_eav_metrics(header, schema, metrics) return { "header": header, "schema": schema, @@ -565,17 +584,33 @@ def get_activity_session_logical_unit(cur, profile_id: str, activity_log_id: str def enrich_sessions_with_metrics(cur, sessions: List[Dict[str, Any]]) -> None: - """Mutates each session dict: adds key 'session_metrics' (list) when sessions non-empty.""" + """ + Mutates each session dict: adds key 'session_metrics' (list). + + Kombiniert EAV mit activity_log-Spalten für Parameter mit source_field (kanonisch: Spalte), + analog zu get_activity_session_logical_unit – ohne doppelte EAV-Speicherung beim Import. + """ if not sessions: return ids = [str(s["id"]) for s in sessions if s.get("id")] if not ids: return ph = ",".join(["%s"] * len(ids)) + + cur.execute( + f"SELECT * FROM activity_log WHERE id IN ({ph})", + ids, + ) + headers_by_id: Dict[str, Dict[str, Any]] = {} + for r in cur.fetchall(): + h = dict(r) + headers_by_id[str(h["id"])] = h + cur.execute( f""" SELECT m.activity_log_id, + m.training_parameter_id, tp.key, tp.data_type, tp.unit, @@ -603,8 +638,33 @@ def enrich_sessions_with_metrics(cur, sessions: List[Dict[str, Any]]) -> None: else: val = r["value_bool"] by_act.setdefault(aid, []).append( - {"key": r["key"], "data_type": dt, "unit": r["unit"], "value": val} + { + "training_parameter_id": r["training_parameter_id"], + "key": r["key"], + "data_type": dt, + "unit": r["unit"], + "value": val, + } ) + + schema_cache: Dict[tuple[Any, Any], List[Dict[str, Any]]] = {} + + def _schema(cat: Any, tid: Any) -> List[Dict[str, Any]]: + cache_key = (cat, tid) + if cache_key not in schema_cache: + schema_cache[cache_key] = resolve_activity_attribute_schema(cur, cat, tid) + return schema_cache[cache_key] + for s in sessions: aid = str(s.get("id")) - s["session_metrics"] = by_act.get(aid, []) + header = headers_by_id.get(aid) + if not header: + s["session_metrics"] = [] + continue + schema = _schema(header.get("training_category"), header.get("training_type_id")) + eav_list = by_act.get(aid, []) + merged = merge_column_backed_and_eav_metrics(header, schema, eav_list) + s["session_metrics"] = [ + {"key": m["key"], "data_type": m["data_type"], "unit": m["unit"], "value": m["value"]} + for m in merged + ] diff --git a/backend/migrations/057_activity_eav_primary_canon.sql b/backend/migrations/057_activity_eav_primary_canon.sql new file mode 100644 index 0000000..e3a74e1 --- /dev/null +++ b/backend/migrations/057_activity_eav_primary_canon.sql @@ -0,0 +1,115 @@ +-- Migration 057: Kanon EAV-primär für erweiterte Trainingsmetriken +-- Date: 2026-04-15 +-- activity_log-Spalten bleiben erhalten (Lesefallback / API); training_parameters.source_field +-- wird für diese Keys entfernt. Idempotenter EAV-Backfill aus Spalten (wie 055), dann source_field NULL. +-- Siehe: backend/data_layer/activity_data_canon.py + +-- min_hr (Spalte hr_min) +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, NULL, a.hr_min, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'min_hr' AND tp.is_active = true +WHERE a.hr_min IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, a.pace_min_per_km::double precision, NULL, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'pace_min_per_km' AND tp.is_active = true +WHERE a.pace_min_per_km IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, NULL, a.cadence, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'cadence' AND tp.is_active = true +WHERE a.cadence IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, NULL, a.avg_power, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'avg_power' AND tp.is_active = true +WHERE a.avg_power IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, NULL, a.elevation_gain, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'elevation_gain' AND tp.is_active = true +WHERE a.elevation_gain IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, a.temperature_celsius::double precision, NULL, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'temperature_celsius' AND tp.is_active = true +WHERE a.temperature_celsius IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, NULL, a.humidity_percent, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'humidity_percent' AND tp.is_active = true +WHERE a.humidity_percent IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, a.avg_hr_percent::double precision, NULL, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'avg_hr_percent' AND tp.is_active = true +WHERE a.avg_hr_percent IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +INSERT INTO activity_session_metrics ( + activity_log_id, training_parameter_id, + value_num, value_int, value_text, value_bool, updated_at +) +SELECT a.id, tp.id, a.kcal_per_km::double precision, NULL, NULL, NULL, NOW() +FROM activity_log a +JOIN training_parameters tp ON tp.key = 'kcal_per_km' AND tp.is_active = true +WHERE a.kcal_per_km IS NOT NULL +ON CONFLICT (activity_log_id, training_parameter_id) DO NOTHING; + +UPDATE training_parameters +SET source_field = NULL +WHERE key IN ( + 'min_hr', + 'pace_min_per_km', + 'cadence', + 'avg_power', + 'elevation_gain', + 'temperature_celsius', + 'humidity_percent', + 'avg_hr_percent', + 'kcal_per_km' +); + +DO $$ +BEGIN + RAISE NOTICE 'Migration 057: EAV-primary canon — backfill + source_field cleared for extended metrics'; +END $$; diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py index 930ec21..b8d8bc2 100644 --- a/backend/tests/test_activity_session_metrics.py +++ b/backend/tests/test_activity_session_metrics.py @@ -1,12 +1,14 @@ """Unit tests for data_layer.activity_session_metrics (no DB for most cases).""" import uuid +from unittest.mock import patch import pytest from data_layer.activity_session_metrics import ( ActivitySessionMetricsError, enrich_sessions_with_metrics, + merge_column_backed_and_eav_metrics, merge_parameter_schema_rows, resolve_activity_attribute_schema, _row_value_tuple, @@ -171,22 +173,39 @@ def test_resolve_loads_category_from_training_type_id(): assert cur.executes[0][1] == (42,) -def test_enrich_sessions_batch(): +@patch("data_layer.activity_session_metrics.resolve_activity_attribute_schema", return_value=[]) +def test_enrich_sessions_batch(mock_resolve): aid = str(uuid.uuid4()) bid = str(uuid.uuid4()) class _Cur: def __init__(self): self.params = None + self._fetch_n = 0 def execute(self, sql, params=None): self.sql = sql self.params = params def fetchall(self): + self._fetch_n += 1 + if self._fetch_n == 1: + return [ + { + "id": uuid.UUID(aid), + "training_category": None, + "training_type_id": None, + }, + { + "id": uuid.UUID(bid), + "training_category": None, + "training_type_id": None, + }, + ] return [ { "activity_log_id": uuid.UUID(aid), + "training_parameter_id": 3, "key": "rpe", "data_type": "integer", "unit": None, @@ -202,3 +221,87 @@ def test_enrich_sessions_batch(): assert sessions[0]["session_metrics"][0]["value"] == 7 assert sessions[0]["session_metrics"][0]["key"] == "rpe" assert sessions[1]["session_metrics"] == [] + + +def test_merge_column_backed_prefers_column_over_stale_eav(): + schema = [ + { + "training_parameter_id": 1, + "key": "hr_avg", + "data_type": "float", + "unit": "bpm", + "validation_rules": {}, + "source_field": "hr_avg", + } + ] + eav = [ + { + "training_parameter_id": 1, + "key": "hr_avg", + "data_type": "float", + "unit": "bpm", + "value": 99.0, + } + ] + out = merge_column_backed_and_eav_metrics({"hr_avg": 140.0}, schema, eav) + assert len(out) == 1 + assert out[0]["value"] == 140.0 + + +def test_merge_falls_back_to_eav_when_column_empty(): + schema = [ + { + "training_parameter_id": 1, + "key": "hr_avg", + "data_type": "float", + "unit": "bpm", + "validation_rules": {}, + "source_field": "hr_avg", + } + ] + eav = [ + { + "training_parameter_id": 1, + "key": "hr_avg", + "data_type": "float", + "unit": "bpm", + "value": 99.0, + } + ] + out = merge_column_backed_and_eav_metrics({"hr_avg": None}, schema, eav) + assert len(out) == 1 + assert out[0]["value"] == 99.0 + + +def test_merge_keeps_eav_only_keys(): + schema = [] + eav = [ + { + "training_parameter_id": 2, + "key": "custom_param", + "data_type": "string", + "unit": None, + "value": "x", + } + ] + out = merge_column_backed_and_eav_metrics({}, schema, eav) + assert len(out) == 1 + assert out[0]["key"] == "custom_param" + + +def test_merge_eav_primary_falls_back_to_legacy_hr_min_column(): + """Kanon: min_hr ohne source_field / ohne EAV — Lesefallback Spalte hr_min.""" + schema = [ + { + "training_parameter_id": 9, + "key": "min_hr", + "data_type": "integer", + "unit": "bpm", + "validation_rules": {}, + "source_field": None, + } + ] + out = merge_column_backed_and_eav_metrics({"hr_min": 88}, schema, []) + assert len(out) == 1 + assert out[0]["key"] == "min_hr" + assert out[0]["value"] == 88 From cd29c7d433f607dfa99284542698ff93bd9ca893 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 16 Apr 2026 07:25:39 +0200 Subject: [PATCH 02/21] feat: Enhance activity session metrics handling and CSV import logic - Updated the `ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE` with new details on CSV import processes and EAV handling, improving documentation clarity. - Refactored the `_import_activity` function to utilize `apply_activity_mapped_column_aliases`, ensuring proper mapping of training parameters and reducing redundancy. - Introduced validation for numeric bounds in the `activity_csv_registry_updates_from_mapped` function, enhancing data integrity during CSV imports. - Added new utility functions to manage column aliasing and streamline the upsert process for session metrics, preventing duplicate entries. - Implemented unit tests to validate the new aliasing logic and ensure correct behavior during session metrics updates. --- ...CTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md | 8 +++ backend/csv_parser/executor.py | 67 +++++------------- backend/csv_parser/module_registry.py | 8 ++- .../activity_persistence_orchestrator.py | 25 +++++-- .../data_layer/activity_session_metrics.py | 46 ++++++++++++- ...ndant_eav_for_column_backed_parameters.sql | 14 ++++ .../tests/test_activity_session_metrics.py | 69 +++++++++++++++++++ 7 files changed, 178 insertions(+), 59 deletions(-) create mode 100644 backend/migrations/058_remove_redundant_eav_for_column_backed_parameters.sql diff --git a/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md b/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md index 604834f..0b8edcc 100644 --- a/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md +++ b/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md @@ -10,6 +10,14 @@ --- +## 0. CSV-Import & Doppel-EAV (Kanon) + +- Vor Schreibzugriff: **`apply_activity_mapped_column_aliases`** kopiert Werte von `training_parameters.key` auf `source_field`-Spalte, wenn die Spalte leer ist (z. B. `avg_hr` → `hr_avg`). +- **`activity_csv_registry_updates_from_mapped`** ist die **einzige** Quelle für `activity_log`-Kernspalten aus dem Mapping (Keys = `module_registry.activity.fields`); der Executor **liest** keine parallelen `mapped.get("hr_avg")`-Pfade mehr. +- Plausible Zahlen: **`min`/`max`** in den Feld-Specs der Registry (keine HF-speziellen Key-Listen im Executor). +- **`upsert_session_metrics_from_csv_mapped`** schreibt **keine** EAV-Zeilen für Parameter mit gesetztem **`source_field`** (kanonisch `activity_log`). +- **Migration 058:** Entfernt bestehende redundante EAV-Zeilen für alle Parameter mit `source_field`. + ## 1. Produktions-Migrationen (Pflicht) - **Nur additive Änderungen** bis zur Stabilisierung: neue Tabellen/Spalten **nullable**, kein `DROP COLUMN` / `DELETE` von Altbestand in derselben Story. diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index 67c78c7..69f1c06 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -763,23 +763,6 @@ def _import_vitals_baseline( } -def _sf_act(val: Any) -> float | None: - try: - return round(float(val), 1) if val is not None else None - except (TypeError, ValueError): - return None - - -def _activity_hr_bpm(val: Any) -> float | None: - """Plausible Herzfrequenz (Import); größere Werte oft Fehlzuordnung (z. B. Schrittzahl) → NUMERIC-Overflow.""" - v = _sf_act(val) - if v is None: - return None - if v < 20 or v > 280: - return None - return v - - def _looks_like_time_only(s: str) -> bool: t = s.strip() if not t or " " in t: @@ -815,7 +798,10 @@ def _import_activity( run_activity_post_write_hooks_import, update_activity_columns, ) - from data_layer.activity_session_metrics import upsert_session_metrics_from_csv_mapped + from data_layer.activity_session_metrics import ( + apply_activity_mapped_column_aliases, + upsert_session_metrics_from_csv_mapped, + ) rows_total = 0 inserted = 0 @@ -873,19 +859,6 @@ def _import_activity( else: end_str = "" - duration_min = mapped.get("duration_min") - if duration_min is not None: - try: - duration_min = round(float(duration_min), 1) - except (TypeError, ValueError): - duration_min = None - - kcal_a = _sf_act(mapped.get("kcal_active")) - kcal_r = _sf_act(mapped.get("kcal_resting")) - hr_a = _activity_hr_bpm(mapped.get("hr_avg")) - hr_m = _activity_hr_bpm(mapped.get("hr_max")) - dist = _sf_act(mapped.get("distance_km")) - wtype = str(activity_type).strip() iso = date_d.isoformat() _, workout_start_t = normalize_activity_start(start_key) @@ -896,6 +869,8 @@ def _import_activity( training_type_id, training_category, training_subcategory = _resolve_training_type_for_activity( cur, wtype, profile_id ) + mapped = apply_activity_mapped_column_aliases(cur, dict(mapped), training_category, training_type_id) + # Nur Modul-Registry (Zielstruktur) + Mapping — keine parallelen hardcodierten CSV-Schlüssel. registry_updates = activity_csv_registry_updates_from_mapped(mapped) existing_id = find_activity_duplicate_id(cur, profile_id, iso, workout_start_t) @@ -904,12 +879,6 @@ def _import_activity( "start_time": workout_start_t, "end_time": end_str or None, "activity_type": wtype, - "duration_min": duration_min, - "kcal_active": kcal_a, - "kcal_resting": kcal_r, - "hr_avg": hr_a, - "hr_max": hr_m, - "distance_km": dist, "training_type_id": training_type_id, "training_category": training_category, "training_subcategory": training_subcategory, @@ -930,12 +899,12 @@ def _import_activity( start_time=workout_start_t, end_time=end_str or None, activity_type=wtype, - duration_min=duration_min, - kcal_active=kcal_a, - kcal_resting=kcal_r, - hr_avg=hr_a, - hr_max=hr_m, - distance_km=dist, + duration_min=registry_updates.get("duration_min"), + kcal_active=registry_updates.get("kcal_active"), + kcal_resting=registry_updates.get("kcal_resting"), + hr_avg=registry_updates.get("hr_avg"), + hr_max=registry_updates.get("hr_max"), + distance_km=registry_updates.get("distance_km"), training_type_id=training_type_id, training_category=training_category, training_subcategory=training_subcategory, @@ -954,12 +923,12 @@ def _import_activity( str(aid), workout_date=iso, training_type_id=training_type_id, - duration_min=duration_min, - hr_avg=hr_a, - hr_max=hr_m, - distance_km=dist, - kcal_active=kcal_a, - kcal_resting=kcal_r, + duration_min=registry_updates.get("duration_min"), + hr_avg=registry_updates.get("hr_avg"), + hr_max=registry_updates.get("hr_max"), + distance_km=registry_updates.get("distance_km"), + kcal_active=registry_updates.get("kcal_active"), + kcal_resting=registry_updates.get("kcal_resting"), ) upsert_session_metrics_from_csv_mapped( cur, diff --git a/backend/csv_parser/module_registry.py b/backend/csv_parser/module_registry.py index 3786327..5564fc2 100644 --- a/backend/csv_parser/module_registry.py +++ b/backend/csv_parser/module_registry.py @@ -65,7 +65,13 @@ MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = { "max": 220, "label_de": "Herzfrequenz max (bpm)", }, - "rpe": {"type": "int", "required": False, "label_de": "RPE (1–10)"}, + "rpe": { + "type": "int", + "required": False, + "min": 1, + "max": 10, + "label_de": "RPE (1–10)", + }, "notes": {"type": "string", "required": False, "label_de": "Notiz"}, }, "derive_date_from_datetime_field": "start_time", diff --git a/backend/data_layer/activity_persistence_orchestrator.py b/backend/data_layer/activity_persistence_orchestrator.py index 6085550..46feac4 100644 --- a/backend/data_layer/activity_persistence_orchestrator.py +++ b/backend/data_layer/activity_persistence_orchestrator.py @@ -83,11 +83,20 @@ def activity_csv_registry_updates_from_mapped(mapped: Mapping[str, Any]) -> Dict except (TypeError, ValueError): return None - def _hr(v: Any) -> float | None: - x = _sf(v) - if x is None or x < 20 or x > 280: - return None - return x + def _within_num_bounds(v: float | int, spec: dict, *, as_float: bool) -> bool: + mn = spec.get("min") + mx = spec.get("max") + if mn is not None: + if as_float and v < float(mn): + return False + if not as_float and v < int(mn): + return False + if mx is not None: + if as_float and v > float(mx): + return False + if not as_float and v > int(mx): + return False + return True for key, spec in fields.items(): if key in _ACTIVITY_CSV_REGISTRY_EXCLUDE: @@ -101,11 +110,15 @@ def activity_csv_registry_updates_from_mapped(mapped: Mapping[str, Any]) -> Dict continue typ = spec.get("type", "string") if typ == "float": - v = _hr(raw) if key in ("hr_avg", "hr_max") else _sf(raw) + v = _sf(raw) + if v is not None and not _within_num_bounds(v, spec, as_float=True): + v = None if v is not None: out[key] = v elif typ == "int": v = _si(raw) + if v is not None and not _within_num_bounds(v, spec, as_float=False): + v = None if v is not None: out[key] = v elif typ == "datetime": diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py index b5ef19d..8ca9c88 100644 --- a/backend/data_layer/activity_session_metrics.py +++ b/backend/data_layer/activity_session_metrics.py @@ -241,6 +241,43 @@ def _coerce_raw_value_for_parameter(data_type: str, raw: Any) -> Any: raise ValueError(data_type) +def apply_activity_mapped_column_aliases_from_schema( + mapped: Mapping[str, Any], + schema: Sequence[Dict[str, Any]], +) -> Dict[str, Any]: + """ + training_parameters.key weicht oft von activity_log-Spalte ab (z. B. avg_hr → hr_avg). + Kopiert Werte auf die Spalte, wenn die Spalte leer ist, damit CSV/Registry activity_log befüllt. + """ + m = dict(mapped) + for s in schema: + sf = s.get("source_field") + if not sf or not str(sf).strip(): + continue + col = str(sf).strip() + pkey = s["key"] + if pkey == col: + continue + col_v = m.get(col) + if col_v is not None and col_v != "": + continue + pk_v = m.get(pkey) + if pk_v is None or pk_v == "": + continue + m[col] = pk_v + return m + + +def apply_activity_mapped_column_aliases( + cur, + mapped: Mapping[str, Any], + training_category: Optional[str], + training_type_id: Optional[int], +) -> Dict[str, Any]: + schema = resolve_activity_attribute_schema(cur, training_category, training_type_id) + return apply_activity_mapped_column_aliases_from_schema(mapped, schema) + + def upsert_session_metrics_from_csv_mapped( cur, profile_id: str, @@ -250,10 +287,10 @@ def upsert_session_metrics_from_csv_mapped( training_type_id: Optional[int], ) -> None: """ - EAV für Trainingsparameter aus CSV (nur Keys, die nicht im activity-Modul-Registry liegen). + EAV für Trainingsparameter aus CSV (nur Keys ohne activity_log-Spalte / ohne source_field). - Kernfelder (Datum, Start, Distanz, HF, …) schreibt der Executor nach activity_log; - hier keine doppelten EAV-Zeilen für dieselben Registry-Keys. + Parameter mit gesetztem source_field sind kanonisch in activity_log — kein EAV-Schreiben (vermeidet + Doppelung zu avg_hr vs. hr_avg o. Ä.). Keys im activity-CSV-Modul werden ebenfalls übersprungen. """ cur.execute( "SELECT profile_id FROM activity_log WHERE id = %s", @@ -274,6 +311,9 @@ def upsert_session_metrics_from_csv_mapped( continue if pkey in activity_registry_keys: continue + sf_raw = spec.get("source_field") + if sf_raw is not None and str(sf_raw).strip(): + continue tid = spec["training_parameter_id"] dt = spec["data_type"] rules = _validation_rules_dict(spec["validation_rules"]) diff --git a/backend/migrations/058_remove_redundant_eav_for_column_backed_parameters.sql b/backend/migrations/058_remove_redundant_eav_for_column_backed_parameters.sql new file mode 100644 index 0000000..c50b312 --- /dev/null +++ b/backend/migrations/058_remove_redundant_eav_for_column_backed_parameters.sql @@ -0,0 +1,14 @@ +-- Migration 058: EAV-Zeilen entfernen, die nur activity_log-Spalten spiegeln (source_field gesetzt). +-- Kanon: merge_column_backed_and_eav_metrics liest diese Werte aus activity_log; Doppelzeilen vermeiden. +-- Date: 2026-04-15 + +DELETE FROM activity_session_metrics asm +USING training_parameters tp +WHERE asm.training_parameter_id = tp.id + AND tp.source_field IS NOT NULL + AND trim(tp.source_field) <> ''; + +DO $$ +BEGIN + RAISE NOTICE 'Migration 058: removed EAV rows for column-backed training_parameters (source_field set)'; +END $$; diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py index b8d8bc2..02dfe2d 100644 --- a/backend/tests/test_activity_session_metrics.py +++ b/backend/tests/test_activity_session_metrics.py @@ -7,10 +7,12 @@ import pytest from data_layer.activity_session_metrics import ( ActivitySessionMetricsError, + apply_activity_mapped_column_aliases_from_schema, enrich_sessions_with_metrics, merge_column_backed_and_eav_metrics, merge_parameter_schema_rows, resolve_activity_attribute_schema, + upsert_session_metrics_from_csv_mapped, _row_value_tuple, _validate_single_value, ) @@ -289,6 +291,73 @@ def test_merge_keeps_eav_only_keys(): assert out[0]["key"] == "custom_param" +def test_apply_mapped_aliases_copies_avg_hr_to_hr_avg(): + schema = [ + { + "key": "avg_hr", + "training_parameter_id": 1, + "source_field": "hr_avg", + "data_type": "integer", + "unit": "bpm", + "validation_rules": {}, + } + ] + out = apply_activity_mapped_column_aliases_from_schema({"avg_hr": 118}, schema) + assert out["avg_hr"] == 118 + assert out["hr_avg"] == 118 + + +def test_apply_mapped_aliases_does_not_overwrite_existing_column(): + schema = [ + { + "key": "avg_hr", + "training_parameter_id": 1, + "source_field": "hr_avg", + "data_type": "integer", + "unit": "bpm", + "validation_rules": {}, + } + ] + out = apply_activity_mapped_column_aliases_from_schema({"avg_hr": 999, "hr_avg": 120}, schema) + assert out["hr_avg"] == 120 + + +@patch("data_layer.activity_session_metrics.resolve_activity_attribute_schema") +def test_upsert_csv_skips_parameter_with_source_field(mock_schema): + """Kein INSERT in activity_session_metrics für Spalten-Parameter (avg_hr → hr_avg).""" + mock_schema.return_value = [ + { + "key": "avg_hr", + "training_parameter_id": 42, + "data_type": "integer", + "validation_rules": {"min": 30, "max": 220}, + "source_field": "hr_avg", + } + ] + + class Cur: + def __init__(self): + self.asm_inserts = 0 + + def execute(self, sql, params=None): + if "INSERT INTO activity_session_metrics" in sql: + self.asm_inserts += 1 + + def fetchone(self): + return {"profile_id": "00000000-0000-0000-0000-000000000001"} + + cur = Cur() + upsert_session_metrics_from_csv_mapped( + cur, + "00000000-0000-0000-0000-000000000001", + "00000000-0000-0000-0000-000000000002", + {"avg_hr": 130}, + "cardio", + 1, + ) + assert cur.asm_inserts == 0 + + def test_merge_eav_primary_falls_back_to_legacy_hr_min_column(): """Kanon: min_hr ohne source_field / ohne EAV — Lesefallback Spalte hr_min.""" schema = [ From 5cda48545883ceb36a50da418581de2b3ec37d37 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 16 Apr 2026 10:35:08 +0200 Subject: [PATCH 03/21] feat: Refactor activity data handling and improve CSV import logic - Updated `ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md` to clarify the derivation of `ACTIVITY_MODULE_REGISTRY_FIELD_KEYS` from `csv_parser.module_registry`. - Enhanced `activity_data_canon.py` to eliminate hardcoded key lists, ensuring all registry fields are derived dynamically. - Refactored the `_import_activity` function to remove redundant parameters and streamline the import process. - Improved the `insert_activity_csv_minimal` function to handle metrics exclusively through `update_activity_columns`, preventing hardcoded values. - Updated frontend components to manage editable activity log fields more effectively, ensuring proper handling of metrics during CSV imports. - Added unit tests to validate the new logic and ensure consistency in activity session metrics handling. --- ...VITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md | 4 +- ...CTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md | 4 +- backend/csv_parser/executor.py | 13 --- backend/data_layer/activity_data_canon.py | 55 +++++------- .../activity_persistence_orchestrator.py | 83 ++++++++----------- .../data_layer/activity_session_metrics.py | 29 +++++-- backend/routers/activity.py | 34 ++++---- .../tests/test_activity_session_metrics.py | 71 ++++++++++++++++ frontend/src/pages/ActivityPage.jsx | 33 ++++++-- 9 files changed, 191 insertions(+), 135 deletions(-) diff --git a/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md b/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md index 4ffb079..2547d2e 100644 --- a/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md +++ b/.claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md @@ -41,7 +41,7 @@ KI / UI / Export ### 2.2 `activity_log` (Spine + heiße Skalare) -**Maschinenlesbarer Kanon:** `backend/data_layer/activity_data_canon.py` (`ACTIVITY_MODULE_REGISTRY_FIELD_KEYS`, `ACTIVITY_EAV_PRIMARY_PARAMETER_KEYS`, Legacy-Lesefallback für EAV-primäre Parameter). +**Maschinenlesbarer Kanon:** `backend/data_layer/activity_data_canon.py` — `ACTIVITY_MODULE_REGISTRY_FIELD_KEYS` wird aus `csv_parser.module_registry` (`activity.fields`) abgeleitet; zusätzlich `ACTIVITY_EAV_PRIMARY_PARAMETER_KEYS` und Legacy-Lesefallback für EAV-primäre Parameter. **Immer (fachlich minimal + listenfähig):** `id`, `profile_id`, Kalender-/Zeitfenster (`date`, `started_at`/`ended_at`, ggf. `start_time`/`end_time` bis Konsolidierung), `duration_min`, `training_type_id` (+ ggf. denormalisierte Kategorie), Legacy `activity_type`, `notes`, `source`, `created`. @@ -101,7 +101,7 @@ Phasen sind **sequentiell** wo „Abhängigkeit“ steht; Teile können parallel **Inhalt:** Schriftliche **Kanon-Tabelle**: pro Messgröße genau eine Quelle (`activity_log` | `eav_scalar` | `eav_composite` | `session_quality`). Liste der Keys, für die **Sync/Spiegelung** endet. -**Definition of Done:** Review im Team; Referenz in diesem Dokument oder Verweis auf Gitea-Kommentar; keine Code-Änderung zwingend. +**Definition of Done:** Review im Team; Referenz in diesem Dokument oder Verweis auf Gitea-Kommentar. **Code (2026-04-16):** Spine-Keys des CSV-Moduls `activity` sind nur noch die Registry-Keys (`get_activity_module_registry_field_keys`); CSV-Minimal-Insert + `update_activity_columns` + DB-Read im Import-Eval-Hook — keine duplizierte hr_avg-Verdrahtung im Executor. **Erster konkreter Schritt:** Kanon-Tabelle als Checkliste (Spreadsheet oder Gitea-Issue) – **eine Zeile pro Semantik**. diff --git a/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md b/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md index 0b8edcc..a12d5a9 100644 --- a/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md +++ b/.claude/docs/technical/ACTIVITY_SESSION_METRICS_EAV_AGENT_GUIDE.md @@ -6,7 +6,7 @@ **Zielarchitektur, Phasenplan (Produktionsreife):** [`ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md`](./ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md) – Kanon `activity_log`/EAV, Composites, Import, Layer 1/2, Reihenfolge A–F. -**Kanon (Code):** `backend/data_layer/activity_data_canon.py` (Repo-Root) — CSV-Modul `activity` vs. EAV-primär; Migration **057**. +**Kanon (Code):** `backend/data_layer/activity_data_canon.py` — Spine-Keys **nur** aus `csv_parser.module_registry` (`get_activity_module_registry_field_keys()` → `ACTIVITY_MODULE_REGISTRY_FIELD_KEYS`); kein paralleles Hardcoding. EAV-primär + Migration **057** unverändert. --- @@ -97,7 +97,7 @@ Router: `backend/routers/admin_training_parameters.py`, `backend/routers/admin_a Siehe **Phasen A–F** in [`ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md`](./ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md). Kurz: -- [ ] **Phase A:** Kanon-Tabelle (eine Quelle pro Semantik). +- [x] **Phase A (Code-Kanon):** Spine-Felder = Registry `activity.fields`; `insert_activity_csv_minimal` nur Kopf, Metriken via `update_activity_columns` / `activity_csv_registry_updates_from_mapped`; Import-Eval liest Session aus DB. *(Spreadsheet „eine Semantik pro Zeile“ weiterhin fachlich empfohlen.)* - [ ] **Phase B:** Lesepfad Layer 1 härten (Consumer-Audit). - [ ] **Phase C:** Schreibpfad: Doppelhaltung / Sync stufenweise abschalten. - [ ] **Phase D:** Composite-MVP (ein Archetyp E2E). diff --git a/backend/csv_parser/executor.py b/backend/csv_parser/executor.py index 69f1c06..03a30ed 100644 --- a/backend/csv_parser/executor.py +++ b/backend/csv_parser/executor.py @@ -899,12 +899,6 @@ def _import_activity( start_time=workout_start_t, end_time=end_str or None, activity_type=wtype, - duration_min=registry_updates.get("duration_min"), - kcal_active=registry_updates.get("kcal_active"), - kcal_resting=registry_updates.get("kcal_resting"), - hr_avg=registry_updates.get("hr_avg"), - hr_max=registry_updates.get("hr_max"), - distance_km=registry_updates.get("distance_km"), training_type_id=training_type_id, training_category=training_category, training_subcategory=training_subcategory, @@ -921,14 +915,7 @@ def _import_activity( cur, profile_id, str(aid), - workout_date=iso, training_type_id=training_type_id, - duration_min=registry_updates.get("duration_min"), - hr_avg=registry_updates.get("hr_avg"), - hr_max=registry_updates.get("hr_max"), - distance_km=registry_updates.get("distance_km"), - kcal_active=registry_updates.get("kcal_active"), - kcal_resting=registry_updates.get("kcal_resting"), ) upsert_session_metrics_from_csv_mapped( cur, diff --git a/backend/data_layer/activity_data_canon.py b/backend/data_layer/activity_data_canon.py index 17ec223..9f3ad81 100644 --- a/backend/data_layer/activity_data_canon.py +++ b/backend/data_layer/activity_data_canon.py @@ -5,30 +5,31 @@ Single Source für: welche Felder das CSV-/Registry-Modul „activity“ direkt und welche training_parameters primär über EAV laufen (mit optionalem Lesefallback auf Legacy-Spalten). Normative Doku: .claude/docs/technical/ACTIVITY_PRODUCTION_ARCHITECTURE_AND_PHASES.md + +Phase A: Keine zweite hartcodierte Key-Liste — Registry-Felder kommen ausschließlich aus +``csv_parser.module_registry.MODULE_DEFINITIONS["activity"].fields``. """ from __future__ import annotations from typing import Dict, Final +from csv_parser.module_registry import get_module_definition + + +def get_activity_module_registry_field_keys() -> frozenset[str]: + """Keys des Universal-CSV-Moduls ``activity`` (= Spine-Spalten-Namen in activity_log).""" + mod = get_module_definition("activity") + if not mod: + return frozenset() + return frozenset((mod.get("fields") or {}).keys()) + + # ── activity_log: Modul „activity“ (Universal-CSV-Kern) ─────────────────────── -# Nur diese Keys erscheinen in csv_parser.module_registry MODULE_DEFINITIONS["activity"].fields. -# Alles Weitere: training_parameters + EAV (Import über upsert_session_metrics_from_csv_mapped). -ACTIVITY_MODULE_REGISTRY_FIELD_KEYS: Final[frozenset[str]] = frozenset( - { - "date", - "start_time", - "end_time", - "activity_type", - "duration_min", - "kcal_active", - "kcal_resting", - "distance_km", - "hr_avg", - "hr_max", - "rpe", - "notes", - } -) +# Ableitung aus module_registry — bei neuen Registry-Feldern hier kein manuelles Update nötig. +ACTIVITY_MODULE_REGISTRY_FIELD_KEYS: Final[frozenset[str]] = get_activity_module_registry_field_keys() + +# Teil-UPDATEs (Import): alle Registry-Kernfelder außer ``date`` (Identität/Duplikat-Key). +ACTIVITY_LOG_PATCHABLE_COLUMNS: Final[frozenset[str]] = ACTIVITY_MODULE_REGISTRY_FIELD_KEYS - {"date"} # Parameter-Keys (training_parameters.key), die primär in EAV geführt werden; source_field nach Migration 057 NULL. # Lesefallback: activity_log-Spalte unter ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM, falls EAV leer. @@ -58,21 +59,3 @@ ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM: Final[Dict[str, str]] = { "avg_hr_percent": "avg_hr_percent", "kcal_per_km": "kcal_per_km", } - -# Spalten, die mit training_parameters.source_field (nach Migration 057) noch activity_log abbilden. -# Erweiterte Metriken sind EAV-primär — nicht hier auflisten. -ACTIVITY_LOG_PATCHABLE_COLUMNS: Final[frozenset[str]] = frozenset( - { - "start_time", - "end_time", - "activity_type", - "duration_min", - "kcal_active", - "kcal_resting", - "hr_avg", - "hr_max", - "distance_km", - "rpe", - "notes", - } -) diff --git a/backend/data_layer/activity_persistence_orchestrator.py b/backend/data_layer/activity_persistence_orchestrator.py index 46feac4..4971d58 100644 --- a/backend/data_layer/activity_persistence_orchestrator.py +++ b/backend/data_layer/activity_persistence_orchestrator.py @@ -15,6 +15,7 @@ from typing import Any, Dict, List, Mapping, Optional from models import ActivityEntry from csv_parser.module_registry import get_module_definition +from data_layer.activity_data_canon import get_activity_module_registry_field_keys logger = logging.getLogger(__name__) @@ -50,10 +51,8 @@ _ACTIVITY_CSV_REGISTRY_EXCLUDE = frozenset({"date", "start_time", "end_time", "a def activity_registry_field_keys() -> frozenset[str]: - mod = get_module_definition("activity") - if not mod: - return frozenset() - return frozenset((mod.get("fields") or {}).keys()) + """Gleiche Menge wie ``ACTIVITY_MODULE_REGISTRY_FIELD_KEYS`` (Phase A: eine Quelle).""" + return get_activity_module_registry_field_keys() def activity_csv_registry_updates_from_mapped(mapped: Mapping[str, Any]) -> Dict[str, Any]: @@ -217,18 +216,17 @@ def insert_activity_csv_minimal( start_time: Any, end_time: Any, activity_type: str, - duration_min: Any, - kcal_active: Any, - kcal_resting: Any, - hr_avg: Any, - hr_max: Any, - distance_km: Any, training_type_id: Any, training_category: Any, training_subcategory: Any, source: str, ) -> None: - """INSERT minimale activity_log-Zeile (Universal-CSV).""" + """ + INSERT Kopfzeile für Universal-CSV / Legacy-Import. + + Metriken aus ``activity_csv_registry_updates_from_mapped`` (oder manuelles Dict) — + ausschließlich via ``update_activity_columns``; keine fest verdrahteten hr_avg-Parameter. + """ cur.execute( """ INSERT INTO activity_log ( @@ -236,7 +234,7 @@ def insert_activity_csv_minimal( kcal_active, kcal_resting, hr_avg, hr_max, distance_km, source, training_type_id, training_category, training_subcategory, created ) - VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,CURRENT_TIMESTAMP) + VALUES (%s,%s,%s,%s,%s,%s,NULL,NULL,NULL,NULL,NULL,NULL,%s,%s,%s,%s,CURRENT_TIMESTAMP) """, ( eid, @@ -245,12 +243,6 @@ def insert_activity_csv_minimal( start_time, end_time, activity_type, - duration_min, - kcal_active, - kcal_resting, - hr_avg, - hr_max, - distance_km, source, training_type_id, training_category, @@ -288,37 +280,32 @@ def run_activity_post_write_hooks_import( profile_id: str, eid: str, *, - workout_date: str, - training_type_id: Optional[int], - duration_min: Any, - hr_avg: Any, - hr_max: Any, - distance_km: Any, - kcal_active: Any, - kcal_resting: Any, + training_type_id: Optional[int] = None, ) -> None: - """Auto-Eval nach Import. Kein Spalte→EAV-Sync (siehe run_activity_post_write_hooks).""" - if _EVALUATION_AVAILABLE and training_type_id and _evaluate_and_save_activity: - try: - activity_dict = { - "id": eid, - "profile_id": profile_id, - "date": workout_date, - "training_type_id": training_type_id, - "duration_min": duration_min, - "hr_avg": hr_avg, - "hr_max": hr_max, - "distance_km": distance_km, - "kcal_active": kcal_active, - "kcal_resting": kcal_resting, - "rpe": None, - "pace_min_per_km": None, - "cadence": None, - "elevation_gain": None, - } - _evaluate_and_save_activity(cur, eid, activity_dict, training_type_id, profile_id) - except Exception as eval_err: - logger.warning("[activity import] Auto-Eval fehlgeschlagen: %s", eval_err) + """Auto-Eval nach Import — liest die Session aus der DB (gleiche Felder wie REST-Hook).""" + if not _EVALUATION_AVAILABLE or not _evaluate_and_save_activity: + return + cur.execute( + """ + SELECT id, profile_id, date, training_type_id, duration_min, + hr_avg, hr_max, distance_km, kcal_active, kcal_resting, + rpe, pace_min_per_km, cadence, elevation_gain + FROM activity_log + WHERE id = %s AND profile_id = %s + """, + (eid, profile_id), + ) + row = cur.fetchone() + if not row: + return + activity_dict = dict(row) + tid = training_type_id if training_type_id is not None else activity_dict.get("training_type_id") + if not tid: + return + try: + _evaluate_and_save_activity(cur, eid, activity_dict, int(tid), profile_id) + except Exception as eval_err: + logger.warning("[activity import] Auto-Eval fehlgeschlagen: %s", eval_err) def merge_activity_csv_module_fields( diff --git a/backend/data_layer/activity_session_metrics.py b/backend/data_layer/activity_session_metrics.py index 8ca9c88..89dcd94 100644 --- a/backend/data_layer/activity_session_metrics.py +++ b/backend/data_layer/activity_session_metrics.py @@ -9,8 +9,10 @@ import logging from decimal import Decimal from typing import Any, Dict, List, Mapping, Optional, Sequence -from csv_parser.module_registry import get_module_definition -from data_layer.activity_data_canon import ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM +from data_layer.activity_data_canon import ( + ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM, + ACTIVITY_MODULE_REGISTRY_FIELD_KEYS, +) logger = logging.getLogger(__name__) @@ -29,6 +31,16 @@ ACTIVITY_LOG_PATCH_FORBIDDEN = frozenset( ) +def _parameter_value_stored_in_eav_only(spec: Mapping[str, Any], parameter_key: str) -> bool: + """False = kanonisch activity_log (Modul-Registry oder training_parameters.source_field).""" + if parameter_key in ACTIVITY_MODULE_REGISTRY_FIELD_KEYS: + return False + sf = spec.get("source_field") + if sf is not None and str(sf).strip(): + return False + return True + + class ActivitySessionMetricsError(Exception): """Raised by Layer 1; routers map to HTTP (404/400).""" @@ -299,8 +311,6 @@ def upsert_session_metrics_from_csv_mapped( row = cur.fetchone() if not row or str(row["profile_id"]) != str(profile_id): return - mod = get_module_definition("activity") or {} - activity_registry_keys = frozenset((mod.get("fields") or {}).keys()) schema = resolve_activity_attribute_schema(cur, training_category, training_type_id) for spec in schema: pkey = spec["key"] @@ -309,10 +319,7 @@ def upsert_session_metrics_from_csv_mapped( raw = mapped[pkey] if raw is None or raw == "": continue - if pkey in activity_registry_keys: - continue - sf_raw = spec.get("source_field") - if sf_raw is not None and str(sf_raw).strip(): + if not _parameter_value_stored_in_eav_only(spec, pkey): continue tid = spec["training_parameter_id"] dt = spec["data_type"] @@ -568,6 +575,8 @@ def replace_activity_session_metrics( if not s["required"]: continue itk = s["key"] + if not _parameter_value_stored_in_eav_only(s, itk): + continue hit = payload_by_key.get(itk) if hit is None or hit.get("value") is None: raise ActivitySessionMetricsError(400, f"Pflichtfeld fehlt: {itk}") @@ -580,9 +589,11 @@ def replace_activity_session_metrics( for item in metrics: k = str(item["parameter_key"]).strip() spec = by_key[k] + if not _parameter_value_stored_in_eav_only(spec, k): + continue val = item.get("value") if val is None: - if spec["required"]: + if spec["required"] and _parameter_value_stored_in_eav_only(spec, k): raise ActivitySessionMetricsError(400, f"Pflichtfeld fehlt: {k}") continue rules = _validation_rules_dict(spec["validation_rules"]) diff --git a/backend/routers/activity.py b/backend/routers/activity.py index 852fc8e..168d685 100644 --- a/backend/routers/activity.py +++ b/backend/routers/activity.py @@ -641,14 +641,7 @@ async def import_activity_csv(file: UploadFile=File(...), x_profile_id: Optional cur, pid, str(existing_id), - workout_date=workout_date, training_type_id=training_type_id, - duration_min=duration_min, - hr_avg=hr_av, - hr_max=hr_mx, - distance_km=dist_km, - kcal_active=kcal_a, - kcal_resting=kcal_r, ) else: new_id = new_activity_id() @@ -660,30 +653,31 @@ async def import_activity_csv(file: UploadFile=File(...), x_profile_id: Optional start_time=workout_start_t, end_time=row.get("End", "") or None, activity_type=wtype, - duration_min=duration_min, - kcal_active=kcal_a, - kcal_resting=kcal_r, - hr_avg=hr_av, - hr_max=hr_mx, - distance_km=dist_km, training_type_id=training_type_id, training_category=training_category, training_subcategory=training_subcategory, source="apple_health", ) + apple_metrics = { + k: v + for k, v in { + "duration_min": duration_min, + "kcal_active": kcal_a, + "kcal_resting": kcal_r, + "hr_avg": hr_av, + "hr_max": hr_mx, + "distance_km": dist_km, + }.items() + if v is not None + } + if apple_metrics: + update_activity_columns(cur, pid, new_id, apple_metrics) inserted += 1 run_activity_post_write_hooks_import( cur, pid, new_id, - workout_date=workout_date, training_type_id=training_type_id, - duration_min=duration_min, - hr_avg=hr_av, - hr_max=hr_mx, - distance_km=dist_km, - kcal_active=kcal_a, - kcal_resting=kcal_r, ) except Exception as e: logger.warning(f"Import row failed: {e}") diff --git a/backend/tests/test_activity_session_metrics.py b/backend/tests/test_activity_session_metrics.py index 02dfe2d..135b8d3 100644 --- a/backend/tests/test_activity_session_metrics.py +++ b/backend/tests/test_activity_session_metrics.py @@ -11,6 +11,7 @@ from data_layer.activity_session_metrics import ( enrich_sessions_with_metrics, merge_column_backed_and_eav_metrics, merge_parameter_schema_rows, + replace_activity_session_metrics, resolve_activity_attribute_schema, upsert_session_metrics_from_csv_mapped, _row_value_tuple, @@ -358,6 +359,61 @@ def test_upsert_csv_skips_parameter_with_source_field(mock_schema): assert cur.asm_inserts == 0 +@patch("data_layer.activity_session_metrics.fetch_activity_session_metrics") +@patch("data_layer.activity_session_metrics.resolve_activity_attribute_schema") +def test_replace_metrics_skips_column_backed_kcal(mock_schema, mock_fetch): + """PUT /metrics: keine EAV-Zeile für kcal_active (liegt in activity_log).""" + pid = str(uuid.uuid4()) + eid = str(uuid.uuid4()) + mock_schema.return_value = [ + { + "training_parameter_id": 1, + "key": "kcal_active", + "data_type": "float", + "validation_rules": {}, + "source_field": "kcal_active", + "required": False, + }, + { + "training_parameter_id": 2, + "key": "custom_reps", + "data_type": "integer", + "validation_rules": {"min": 0}, + "source_field": None, + "required": False, + }, + ] + mock_fetch.return_value = [] + + class Cur: + def __init__(self): + self.asm_inserts = 0 + + def execute(self, sql, params=None): + if "INSERT INTO activity_session_metrics" in sql: + self.asm_inserts += 1 + + def fetchone(self): + return { + "profile_id": pid, + "training_category": "strength", + "training_type_id": 1, + } + + cur = Cur() + replace_activity_session_metrics( + cur, + pid, + eid, + [ + {"parameter_key": "kcal_active", "value": 450.0}, + {"parameter_key": "custom_reps", "value": 12}, + ], + ) + assert cur.asm_inserts == 1 + mock_fetch.assert_called_once_with(cur, eid) + + def test_merge_eav_primary_falls_back_to_legacy_hr_min_column(): """Kanon: min_hr ohne source_field / ohne EAV — Lesefallback Spalte hr_min.""" schema = [ @@ -374,3 +430,18 @@ def test_merge_eav_primary_falls_back_to_legacy_hr_min_column(): assert len(out) == 1 assert out[0]["key"] == "min_hr" assert out[0]["value"] == 88 + + +def test_activity_module_registry_field_keys_match_csv_module_definition(): + """Phase A: Kanon-Spine = module_registry „activity“.fields (keine zweite Liste).""" + from csv_parser.module_registry import get_module_definition + from data_layer.activity_data_canon import ( + ACTIVITY_MODULE_REGISTRY_FIELD_KEYS, + get_activity_module_registry_field_keys, + ) + + mod = get_module_definition("activity") + assert mod is not None + expected = frozenset((mod.get("fields") or {}).keys()) + assert get_activity_module_registry_field_keys() == expected + assert ACTIVITY_MODULE_REGISTRY_FIELD_KEYS == expected diff --git a/frontend/src/pages/ActivityPage.jsx b/frontend/src/pages/ActivityPage.jsx index 50ea792..195475b 100644 --- a/frontend/src/pages/ActivityPage.jsx +++ b/frontend/src/pages/ActivityPage.jsx @@ -96,6 +96,16 @@ const ACTIVITY_LOG_PAYLOAD_KEYS = new Set([ 'training_subcategory', ]) +/** activity_log-Spalten, die im EntryForm editiert werden (nicht aus metricDraft überschreiben). */ +const ACTIVITY_ENTRY_FORM_COLUMNS = new Set([ + 'duration_min', + 'kcal_active', + 'hr_avg', + 'hr_max', + 'rpe', + 'notes', +]) + function empty() { return { date: dayjs().format('YYYY-MM-DD'), @@ -113,6 +123,9 @@ function empty() { function buildMetricsPayload(schema, draft) { const out = [] for (const s of schema) { + if (s.source_field && ACTIVITY_LOG_PAYLOAD_KEYS.has(String(s.source_field))) { + continue + } const raw = draft[s.key] if (s.data_type === 'boolean') { if (raw === '' || raw === null || raw === undefined) { @@ -148,14 +161,23 @@ function SessionMetricsFields({ schema, values, setValues, metrics }) { const schemaList = Array.isArray(schema) ? schema : [] const metricRows = Array.isArray(metrics) ? metrics : [] const schemaKeys = new Set(schemaList.map((s) => s.key)) - const orphanMetrics = metricRows.filter((row) => row && row.key && !schemaKeys.has(row.key)) + const isHeadColumnMetric = (s) => + s && s.source_field && ACTIVITY_LOG_PAYLOAD_KEYS.has(String(s.source_field)) + const schemaForProfileOnly = schemaList.filter((s) => !isHeadColumnMetric(s)) + const orphanMetrics = metricRows.filter( + (row) => + row && + row.key && + !schemaKeys.has(row.key) && + !ACTIVITY_LOG_PAYLOAD_KEYS.has(row.key) + ) - if (schemaList.length === 0 && orphanMetrics.length === 0) return null + if (schemaForProfileOnly.length === 0 && orphanMetrics.length === 0) return null const set = (k, v) => setValues((prev) => ({ ...prev, [k]: v })) return (
Weitere Kennwerte (Profil)
- {schemaList.map((s) => ( + {schemaForProfileOnly.map((s) => (