diff --git a/.claude/docs/working/issue-21-seed-migration-example.sql b/.claude/docs/working/issue-21-seed-migration-example.sql new file mode 100644 index 0000000..efb9362 --- /dev/null +++ b/.claude/docs/working/issue-21-seed-migration-example.sql @@ -0,0 +1,460 @@ +-- Migration XXX: CSV Parser - System Templates Seed Data +-- Legt Standard-Import-Konfigurationen für bekannte CSV-Formate an +-- Diese Templates sind für alle User verfügbar (is_system = true, profile_id = NULL) + +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +-- NUTRITION (Ernährung) +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +-- 1. FDDB Export (Deutsch) +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'nutrition', + 'FDDB Export (Standard)', + 'Standard-Format für FDDB.de CSV-Exporte (Deutsch). Delimiter Semikolon, kJ → kcal Konvertierung.', + ARRAY['datum_tag_monat_jahr_stunde_minute', 'fett_g', 'kh_g', 'kj', 'protein_g']::TEXT[], + ';', + 'utf-8', + true, + '{ + "datum_tag_monat_jahr_stunde_minute": "date", + "kj": "kcal", + "fett_g": "fat_g", + "kh_g": "carbs_g", + "protein_g": "protein_g" + }'::JSONB, + '{ + "date": { + "type": "date", + "format": "dd.mm.yyyy HH:MM", + "extract": "date_only" + }, + "kcal": { + "type": "float", + "source_unit": "kJ", + "target_unit": "kcal", + "conversion_factor": 0.239, + "decimal_separator": "," + }, + "fat_g": { + "type": "float", + "decimal_separator": "," + }, + "carbs_g": { + "type": "float", + "decimal_separator": "," + }, + "protein_g": { + "type": "float", + "decimal_separator": "," + } + }'::JSONB +); + +-- 2. MyFitnessPal Export (English) +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'nutrition', + 'MyFitnessPal Export', + 'Standard CSV export from MyFitnessPal (English)', + ARRAY['Carbohydrates (g)', 'Calories', 'Date', 'Fat (g)', 'Protein (g)']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Date": "date", + "Calories": "kcal", + "Fat (g)": "fat_g", + "Carbohydrates (g)": "carbs_g", + "Protein (g)": "protein_g" + }'::JSONB, + '{ + "date": { + "type": "date", + "format": "yyyy-mm-dd" + }, + "kcal": { + "type": "float", + "decimal_separator": "." + }, + "fat_g": { + "type": "float", + "decimal_separator": "." + }, + "carbs_g": { + "type": "float", + "decimal_separator": "." + }, + "protein_g": { + "type": "float", + "decimal_separator": "." + } + }'::JSONB +); + +-- 3. Cronometer Export +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'nutrition', + 'Cronometer Export', + 'Cronometer daily nutrition export (English)', + ARRAY['Day', 'Energy (kcal)', 'Fat (g)', 'Net Carbs (g)', 'Protein (g)']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Day": "date", + "Energy (kcal)": "kcal", + "Fat (g)": "fat_g", + "Net Carbs (g)": "carbs_g", + "Protein (g)": "protein_g" + }'::JSONB, + '{ + "date": { + "type": "date", + "format": "yyyy-mm-dd" + }, + "kcal": {"type": "float", "decimal_separator": "."}, + "fat_g": {"type": "float", "decimal_separator": "."}, + "carbs_g": {"type": "float", "decimal_separator": "."}, + "protein_g": {"type": "float", "decimal_separator": "."} + }'::JSONB +); + +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +-- ACTIVITY (Aktivität) +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +-- 1. Apple Health Workout Export (English) +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'activity', + 'Apple Health Workout Export (English)', + 'Apple Health CSV-Export für Workouts (English). Automatisches Training-Type-Mapping.', + ARRAY['Active Energy (kcal)', 'Distance (km)', 'Duration', 'End', 'Heart Rate Average (bpm)', 'Start', 'Workout Type']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Workout Type": "activity_type", + "Start": "start_time", + "End": "end_time", + "Duration": "duration_min", + "Distance (km)": "distance_km", + "Active Energy (kcal)": "kcal_active", + "Heart Rate Average (bpm)": "hr_avg" + }'::JSONB, + '{ + "start_time": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS", + "extract": "date_and_time" + }, + "end_time": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS" + }, + "duration_min": { + "type": "duration", + "format": "HH:MM:SS", + "target_unit": "minutes" + }, + "distance_km": { + "type": "float", + "decimal_separator": "." + }, + "kcal_active": { + "type": "float", + "decimal_separator": "." + }, + "hr_avg": { + "type": "int" + } + }'::JSONB +); + +-- 2. Apple Health Workout Export (Deutsch) +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'activity', + 'Apple Health Workout Export (Deutsch)', + 'Apple Health CSV-Export für Workouts (Deutsch). Automatisches Training-Type-Mapping.', + ARRAY['Aktive Energie (kcal)', 'Dauer', 'Durchschnittliche Herzfrequenz (bpm)', 'Ende', 'Start', 'Strecke (km)', 'Trainingsart']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Trainingsart": "activity_type", + "Start": "start_time", + "Ende": "end_time", + "Dauer": "duration_min", + "Strecke (km)": "distance_km", + "Aktive Energie (kcal)": "kcal_active", + "Durchschnittliche Herzfrequenz (bpm)": "hr_avg" + }'::JSONB, + '{ + "start_time": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS", + "extract": "date_and_time" + }, + "end_time": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS" + }, + "duration_min": { + "type": "duration", + "format": "HH:MM:SS", + "target_unit": "minutes" + }, + "distance_km": { + "type": "float", + "decimal_separator": "," + }, + "kcal_active": { + "type": "float", + "decimal_separator": "," + }, + "hr_avg": { + "type": "int" + } + }'::JSONB +); + +-- 3. Garmin Connect Export +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'activity', + 'Garmin Connect Export', + 'Garmin Connect activity CSV export (English)', + ARRAY['Activity Type', 'Avg HR', 'Calories', 'Date', 'Distance', 'Duration', 'Time']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Activity Type": "activity_type", + "Date": "date", + "Time": "start_time", + "Duration": "duration_min", + "Distance": "distance_km", + "Calories": "kcal_active", + "Avg HR": "hr_avg" + }'::JSONB, + '{ + "date": { + "type": "date", + "format": "yyyy-mm-dd" + }, + "start_time": { + "type": "time", + "format": "HH:MM:SS" + }, + "duration_min": { + "type": "duration", + "format": "HH:MM:SS", + "target_unit": "minutes" + }, + "distance_km": { + "type": "float", + "decimal_separator": "." + }, + "kcal_active": { + "type": "float", + "decimal_separator": "." + }, + "hr_avg": { + "type": "int" + } + }'::JSONB +); + +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +-- BLOOD PRESSURE (Blutdruck) +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +-- 1. Omron Export (Deutsch) +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'blood_pressure', + 'Omron Export (Deutsch)', + 'Omron Blutdruckmessgerät CSV-Export (Deutsch)', + ARRAY['Datum', 'Diastolisch (mmHg)', 'Puls (bpm)', 'Systolisch (mmHg)', 'Zeit']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Datum": "measured_date", + "Zeit": "measured_time", + "Systolisch (mmHg)": "systolic", + "Diastolisch (mmHg)": "diastolic", + "Puls (bpm)": "pulse" + }'::JSONB, + '{ + "measured_date": { + "type": "date", + "format": "dd.mm.yyyy" + }, + "measured_time": { + "type": "time", + "format": "HH:MM" + }, + "systolic": {"type": "int"}, + "diastolic": {"type": "int"}, + "pulse": {"type": "int"} + }'::JSONB +); + +-- 2. Omron Export (English) +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'blood_pressure', + 'Omron Export (English)', + 'Omron blood pressure monitor CSV export (English)', + ARRAY['Date', 'Diastolic (mmHg)', 'Pulse (bpm)', 'Systolic (mmHg)', 'Time']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Date": "measured_date", + "Time": "measured_time", + "Systolic (mmHg)": "systolic", + "Diastolic (mmHg)": "diastolic", + "Pulse (bpm)": "pulse" + }'::JSONB, + '{ + "measured_date": { + "type": "date", + "format": "mm/dd/yyyy" + }, + "measured_time": { + "type": "time", + "format": "HH:MM" + }, + "systolic": {"type": "int"}, + "diastolic": {"type": "int"}, + "pulse": {"type": "int"} + }'::JSONB +); + +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +-- WEIGHT (Gewicht) +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +-- 1. Apple Health Weight Export +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'weight', + 'Apple Health Weight Export', + 'Apple Health body mass CSV export', + ARRAY['Body Mass (kg)', 'Start']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Start": "date", + "Body Mass (kg)": "weight" + }'::JSONB, + '{ + "date": { + "type": "datetime", + "format": "yyyy-mm-dd HH:MM:SS", + "extract": "date_only" + }, + "weight": { + "type": "float", + "decimal_separator": "." + } + }'::JSONB +); + +-- 2. Withings Export +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) VALUES ( + NULL, + true, + 'weight', + 'Withings Export', + 'Withings smart scale CSV export (weight, body fat, muscle mass)', + ARRAY['Body Fat (%)', 'Date', 'Muscle Mass (kg)', 'Weight (kg)']::TEXT[], + ',', + 'utf-8', + true, + '{ + "Date": "date", + "Weight (kg)": "weight" + }'::JSONB, + '{ + "date": { + "type": "date", + "format": "yyyy-mm-dd" + }, + "weight": { + "type": "float", + "decimal_separator": "." + } + }'::JSONB +); + +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +-- SUMMARY +-- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +DO $$ +DECLARE + template_count INTEGER; +BEGIN + SELECT COUNT(*) INTO template_count FROM csv_field_mappings WHERE is_system = true; + RAISE NOTICE '✓ CSV Parser: % System-Templates created', template_count; + RAISE NOTICE ' - Nutrition: 3 (FDDB, MyFitnessPal, Cronometer)'; + RAISE NOTICE ' - Activity: 3 (Apple Health DE/EN, Garmin)'; + RAISE NOTICE ' - Blood Pressure: 2 (Omron DE/EN)'; + RAISE NOTICE ' - Weight: 2 (Apple Health, Withings)'; +END $$; diff --git a/.claude/docs/working/issue-21-universal-csv-parser-analysis.md b/.claude/docs/working/issue-21-universal-csv-parser-analysis.md new file mode 100644 index 0000000..e7f5617 --- /dev/null +++ b/.claude/docs/working/issue-21-universal-csv-parser-analysis.md @@ -0,0 +1,1035 @@ +# Issue #21: Universeller CSV-Parser – Anforderungsanalyse & Konzept + +**Stand:** 2026-04-09 +**Autor:** Claude Code Agent +**Status:** Konzeptphase (Wartet auf User-Approval) + +--- + +## 1. Ausgangslage + +### 1.1 Bestehende CSV-Import-Implementierungen + +Aktuell existieren **4 separate CSV-Import-Funktionen**: + +| Modul | Datei | Format | Besonderheiten | +|-------|-------|--------|----------------| +| **Nutrition** | `nutrition.py:34` | FDDB | Delimiter `;`, hardcoded Spalten, Aggregierung nach Tag | +| **Activity** | `activity.py:344` | Apple Health | **Lernendes Mapping** via `activity_type_mappings`, Update-or-Insert | +| **Blood Pressure** | `blood_pressure.py:293` | Omron | Multiple Spaltennamen-Varianten (DE/EN), Context-Tagging | +| **ZIP Import** | `importdata.py:30` | Eigenes Format | Profile.json + CSV-Bundle | + +### 1.2 Gemeinsame Patterns (bereits vorhanden) + +✅ **Encoding-Detection:** +```python +try: text = raw.decode('utf-8') +except: text = raw.decode('latin-1') +if text.startswith('\ufeff'): text = text[1:] # BOM-Handling +``` + +✅ **Duplikat-Erkennung:** +- Nutrition: `ON CONFLICT (profile_id, date) DO UPDATE` +- Activity: `SELECT WHERE profile_id=%s AND date=%s AND start_time=%s` +- Blood Pressure: Timestamp-basiert + +✅ **Type-Conversion** (scattered): +- Datumsformate: FDDB (`dd.mm.yyyy`), Apple Health (ISO), Omron (mehrere) +- Dezimaltrennzeichen: `,` → `.` +- Einheiten: kJ → kcal + +❌ **Fehlende Patterns:** +- Kein **einheitliches Mapping-System** (außer Activity) +- Kein **User-Interface für Mapping-Anpassung** +- Keine **automatische Format-Erkennung** +- Keine **Vorschläge für unbekannte Spalten** + +--- + +## 2. Anforderungen (aus User-Request) + +### 2.1 Funktionale Anforderungen + +| # | Anforderung | Priorität | +|---|-------------|-----------| +| **F1** | **Universeller Parser:** Ein Parser für alle Module (Nutrition, Activity, Weight, Circumference, Caliper, Vitals, Sleep) | MUST | +| **F2** | **Lernendes System:** Automatische Erkennung bekannter CSV-Strukturen basierend auf Spalten-Signaturen | MUST | +| **F3** | **User-anpassbares Mapping:** UI zur manuellen Zuordnung von CSV-Spalten zu DB-Feldern | MUST | +| **F4** | **Intelligente Vorschläge:** System schlägt Mappings vor basierend auf Spalten-Namen, Sample-Daten, Statistiken | SHOULD | +| **F5** | **Type-Conversion:** Automatische Konvertierung von Datumsformaten, Dezimaltrennzeichen, Text→Zahl, Einheiten | MUST | +| **F6** | **Mapping-Persistenz:** Gespeicherte Mappings können wiederverwendet werden (pro User, pro Modul, global) | MUST | +| **F7** | **Format-Templates:** Vordefinierte Templates für bekannte Formate (FDDB, Apple Health, Omron, Garmin, etc.) | SHOULD | +| **F8** | **Validierung:** Vor-Import-Validierung mit Fehler-Report und Preview (erste 5 Zeilen) | SHOULD | +| **F9** | **Rollback:** Fehlerhafte Imports können rückgängig gemacht werden | NICE | + +### 2.2 Nicht-funktionale Anforderungen + +| # | Anforderung | Priorität | +|---|-------------|-----------| +| **NF1** | **Backward-Kompatibilität:** Bestehende CSV-Import-Endpoints bleiben funktionsfähig (Wrapper um neuen Parser) | MUST | +| **NF2** | **Performance:** Import von 1000 Zeilen < 5 Sekunden | SHOULD | +| **NF3** | **Erweiterbarkeit:** Neue Module/Felder können ohne Code-Änderung hinzugefügt werden (Registry-Pattern) | MUST | +| **NF4** | **Security:** User können nur eigene Mappings sehen/ändern (außer Admin) | MUST | + +--- + +## 3. Datenmodell + +### 3.1 Neue DB-Tabellen + +#### **`csv_field_mappings`** (Zentrale Mapping-Registry) + +```sql +CREATE TABLE csv_field_mappings ( + id SERIAL PRIMARY KEY, + profile_id INTEGER REFERENCES profiles(id), -- NULL = System-Template + is_system BOOLEAN DEFAULT false, -- true = read-only Template + module VARCHAR(50) NOT NULL, -- 'nutrition', 'activity', etc. + mapping_name VARCHAR(100) NOT NULL, -- "FDDB Export", "Apple Health" + description TEXT, -- "Standard-Format für FDDB CSV-Exporte" + + -- CSV-Signatur (für Auto-Detection) + column_signature TEXT[], -- Spalten-Namen (sortiert, normalisiert) + delimiter VARCHAR(10) DEFAULT ',', -- CSV-Delimiter + encoding VARCHAR(20) DEFAULT 'utf-8', + has_header BOOLEAN DEFAULT true, + + -- Mapping-Definition (JSONB) + field_mappings JSONB NOT NULL, -- { "csv_column": "db_field" } + type_conversions JSONB, -- { "db_field": {"type": "date", "format": "dd.mm.yyyy"} } + + -- Statistik (für Ranking) + usage_count INTEGER DEFAULT 0, + last_used_at TIMESTAMP, + success_rate FLOAT DEFAULT 1.0, -- Erfolgreiche Imports / Gesamt + + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW(), + + UNIQUE(profile_id, module, mapping_name), + CHECK ( + -- System-Templates haben profile_id = NULL + (is_system = true AND profile_id IS NULL) OR + (is_system = false AND profile_id IS NOT NULL) OR + (is_system = false AND profile_id IS NULL) + ) +); + +CREATE INDEX idx_csv_mappings_lookup ON csv_field_mappings(module, profile_id); +CREATE INDEX idx_csv_mappings_signature ON csv_field_mappings USING GIN (column_signature); +CREATE INDEX idx_csv_mappings_system ON csv_field_mappings(is_system, module) WHERE is_system = true; + +COMMENT ON TABLE csv_field_mappings IS 'Mapping-Registry: System-Templates (is_system=true) + User-Mappings (profile_id NOT NULL)'; +COMMENT ON COLUMN csv_field_mappings.is_system IS 'System-Templates sind read-only und für alle User verfügbar'; +COMMENT ON COLUMN csv_field_mappings.profile_id IS 'NULL = System-Template, NOT NULL = User-spezifisches Mapping'; +``` + +**Beispiel-Entries:** + +**System-Template (für alle User verfügbar):** +```json +{ + "id": 1, + "profile_id": null, + "is_system": true, + "module": "nutrition", + "mapping_name": "FDDB Export (Standard)", + "description": "Standard-Format für FDDB.de CSV-Exporte (Deutsch)", + "column_signature": ["datum_tag_monat_jahr_stunde_minute", "fett_g", "kh_g", "kj", "protein_g"], + "delimiter": ";", + "encoding": "utf-8", + "has_header": true, + "field_mappings": { + "datum_tag_monat_jahr_stunde_minute": "date", + "kj": "kcal", + "fett_g": "fat_g", + "kh_g": "carbs_g", + "protein_g": "protein_g" + }, + "type_conversions": { + "date": { + "type": "date", + "format": "dd.mm.yyyy HH:MM", + "extract": "date_only" + }, + "kcal": { + "type": "float", + "source_unit": "kJ", + "target_unit": "kcal", + "conversion_factor": 0.239 + }, + "fat_g": { + "type": "float", + "decimal_separator": "," + } + }, + "usage_count": 1523, + "success_rate": 0.99 +} +``` + +**User-spezifisches Mapping (nur für User ID 42):** +```json +{ + "id": 123, + "profile_id": 42, + "is_system": false, + "module": "nutrition", + "mapping_name": "Mein FDDB Export (angepasst)", + "description": "FDDB Export mit Notiz-Spalte", + "column_signature": ["datum_tag_monat_jahr_stunde_minute", "fett_g", "kh_g", "kj", "protein_g", "notiz"], + "delimiter": ";", + "encoding": "utf-8", + "has_header": true, + "field_mappings": { + "datum_tag_monat_jahr_stunde_minute": "date", + "kj": "kcal", + "fett_g": "fat_g", + "kh_g": "carbs_g", + "protein_g": "protein_g", + "notiz": "note" + }, + "type_conversions": { + "date": { + "type": "date", + "format": "dd.mm.yyyy HH:MM", + "extract": "date_only" + }, + "kcal": { + "type": "float", + "source_unit": "kJ", + "target_unit": "kcal", + "conversion_factor": 0.239 + } + }, + "usage_count": 8, + "success_rate": 1.0 +} +``` + +#### **`csv_import_log`** (Import-Historie für Rollback) + +```sql +CREATE TABLE csv_import_log ( + id SERIAL PRIMARY KEY, + profile_id INTEGER REFERENCES profiles(id), + mapping_id INTEGER REFERENCES csv_field_mappings(id), + module VARCHAR(50) NOT NULL, + + filename VARCHAR(255), + rows_total INTEGER, + rows_imported INTEGER, + rows_updated INTEGER, + rows_skipped INTEGER, + rows_errors INTEGER, + + error_details JSONB, -- [{"row": 5, "error": "Invalid date"}] + + started_at TIMESTAMP DEFAULT NOW(), + finished_at TIMESTAMP, + status VARCHAR(20) DEFAULT 'running', -- 'running', 'success', 'failed' + + -- Für Rollback + affected_ids JSONB -- {"nutrition_log": [123, 456, ...]} +); + +CREATE INDEX idx_csv_import_profile ON csv_import_log(profile_id, module); +``` + +### 3.2 System-Templates (Seed-Data) + +**Bei Installation/Migration werden folgende System-Templates angelegt:** + +#### **Nutrition (Ernährung)** + +1. **FDDB Export (Standard)** + - Delimiter: `;` + - Encoding: `utf-8` + - Spalten: `datum_tag_monat_jahr_stunde_minute`, `kj`, `fett_g`, `kh_g`, `protein_g` + - Besonderheit: kJ → kcal Konvertierung + +2. **MyFitnessPal Export** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Date`, `Calories`, `Carbohydrates (g)`, `Fat (g)`, `Protein (g)` + +3. **Cronometer Export** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Day`, `Energy (kcal)`, `Protein (g)`, `Net Carbs (g)`, `Fat (g)` + +#### **Activity (Aktivität)** + +1. **Apple Health Workout Export (English)** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Workout Type`, `Start`, `End`, `Duration`, `Distance (km)`, `Active Energy (kcal)`, `Heart Rate Average (bpm)` + - Besonderheit: Automatisches Training-Type-Mapping + +2. **Apple Health Workout Export (Deutsch)** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Trainingsart`, `Start`, `Ende`, `Dauer`, `Strecke (km)`, `Aktive Energie (kcal)`, `Durchschnittliche Herzfrequenz (bpm)` + +3. **Garmin Connect Export** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Activity Type`, `Date`, `Time`, `Duration`, `Distance`, `Calories`, `Avg HR` + +#### **Blood Pressure (Blutdruck)** + +1. **Omron Export (Deutsch)** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Datum`, `Zeit`, `Systolisch (mmHg)`, `Diastolisch (mmHg)`, `Puls (bpm)` + +2. **Omron Export (English)** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Date`, `Time`, `Systolic (mmHg)`, `Diastolic (mmHg)`, `Pulse (bpm)` + +#### **Vitals (Vitalwerte)** + +1. **Apple Health Vitals Export** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Start`, `Resting Heart Rate (bpm)`, `Heart Rate Variability (ms)`, `Respiratory Rate (breaths/min)`, `Oxygen Saturation (%)` + +#### **Weight (Gewicht)** + +1. **Apple Health Weight Export** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Start`, `Body Mass (kg)` + +2. **Withings Export** + - Delimiter: `,` + - Encoding: `utf-8` + - Spalten: `Date`, `Weight (kg)`, `Body Fat (%)`, `Muscle Mass (kg)` + +**GESAMT:** ~12-15 System-Templates initial + +**Migration:** `backend/migrations/XXX_csv_parser_seed_templates.sql` + +### 3.3 Modul-Registry (Backend Code) + +**`backend/csv_parser/module_registry.py`** + +Definiert für jedes Modul: +- Verfügbare DB-Felder +- Datentypen +- Validierung +- Erforderliche Felder +- Duplikat-Strategie + +```python +MODULE_DEFINITIONS = { + "nutrition": { + "table": "nutrition_log", + "fields": { + "date": {"type": "date", "required": True}, + "kcal": {"type": "float", "required": True, "min": 0, "max": 10000}, + "protein_g": {"type": "float", "required": False, "min": 0}, + "fat_g": {"type": "float", "required": False, "min": 0}, + "carbs_g": {"type": "float", "required": False, "min": 0}, + "note": {"type": "string", "required": False, "max_length": 500} + }, + "duplicate_key": ["profile_id", "date"], # ON CONFLICT + "duplicate_strategy": "update" # "update" | "skip" | "error" + }, + "activity": { + "table": "activity_log", + "fields": { + "date": {"type": "date", "required": True}, + "start_time": {"type": "time", "required": False}, + "activity_type": {"type": "string", "required": True}, + "duration_min": {"type": "float", "required": True, "min": 0}, + "kcal_active": {"type": "float", "required": False}, + "distance_km": {"type": "float", "required": False}, + "hr_avg": {"type": "int", "required": False, "min": 30, "max": 220} + }, + "duplicate_key": ["profile_id", "date", "start_time"], + "duplicate_strategy": "update" + }, + # ... weitere Module +} +``` + +--- + +## 4. Architektur + +### 4.1 System-Komponenten + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Frontend (React) │ +├─────────────────────────────────────────────────────────────┤ +│ 1. CSV-Upload-Komponente │ +│ - Datei-Upload + Format-Detection │ +│ - Preview (erste 5 Zeilen) │ +│ │ +│ 2. Mapping-Editor │ +│ - Spalten-zu-Feld-Zuordnung (Drag & Drop) │ +│ - Type-Conversion-Konfiguration │ +│ - Vorschau der konvertierten Werte │ +│ │ +│ 3. Mapping-Bibliothek │ +│ - Gespeicherte Mappings anzeigen/auswählen │ +│ - Templates (FDDB, Apple Health, etc.) │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Backend (FastAPI) │ +├─────────────────────────────────────────────────────────────┤ +│ 1. CSV-Parser-Engine │ +│ - Encoding-Detection (UTF-8, Latin-1, etc.) │ +│ - Delimiter-Detection (`,` `;` `\t`) │ +│ - Column-Signature-Berechnung │ +│ │ +│ 2. Mapping-Engine │ +│ - Auto-Detection (Spalten → bekannte Mappings) │ +│ - Intelligent Suggestions (Fuzzy-Match, Sample-Analyse) │ +│ - Mapping-Persistenz (DB speichern/laden) │ +│ │ +│ 3. Type-Converter │ +│ - Date-Parser (20+ Formate) │ +│ - Number-Parser (Dezimaltrennzeichen, Tausender) │ +│ - Unit-Converter (kJ↔kcal, km↔mi, etc.) │ +│ - Text-Normalizer (Trim, Lowercase, etc.) │ +│ │ +│ 4. Validator │ +│ - Type-Validation (INT, FLOAT, DATE, etc.) │ +│ - Range-Validation (min/max) │ +│ - Required-Field-Check │ +│ - Custom-Validators pro Modul │ +│ │ +│ 5. Import-Executor │ +│ - Batch-Insert mit Transaction │ +│ - Duplikat-Handling (Update/Skip/Error) │ +│ - Rollback bei Fehler │ +│ - Progress-Tracking (für große Files) │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ PostgreSQL │ +├─────────────────────────────────────────────────────────────┤ +│ - csv_field_mappings (Mapping-Registry) │ +│ - csv_import_log (Import-Historie) │ +│ - nutrition_log, activity_log, ... (Daten-Tabellen) │ +└─────────────────────────────────────────────────────────────┘ +``` + +### 4.2 Workflow (Happy Path) + +``` +1. User wählt Datei + ↓ +2. Frontend: POST /api/csv/analyze + - Datei hochladen + - Backend: Encoding + Delimiter erkennen + - Backend: Column-Signature berechnen + - Backend: Auto-Detection + 1. Suche in User-Mappings (profile_id = current_user) + 2. Suche in System-Templates (is_system = true) + ↓ +3. Backend antwortet: + { + "detected_mapping": { + "id": 1, + "name": "FDDB Export (Standard)", + "is_system": true, + "confidence": 0.98, + "match_type": "exact_signature" + }, + "columns": ["date", "kcal", "protein"], + "sample_rows": [...], + "suggestions": { + "date": ["date", "created_at"], // Vorschläge + "kcal": ["kcal", "energy"] + } + } + ↓ +4. Frontend: Mapping-Editor + - User sieht: "System-Template erkannt: FDDB Export (Standard)" + - User kann Mapping anpassen (erstellt dann automatisch User-Copy) + - User testet Type-Conversion (Preview) + ↓ +5. Frontend: POST /api/csv/import + { + "mapping_id": 1, // Verwende bestehendes Mapping, ODER: + "mapping": {...}, // Custom-Mapping + "module": "nutrition", + "save_mapping": true, // Als User-Mapping speichern? + "mapping_name": "MyFitnessPal Export" + } + ↓ +6. Backend: Import ausführen + - Validierung + - Transaction starten + - Row-by-Row importieren + - Bei Fehler: Rollback + - Bei Erfolg: usage_count++ für verwendetes Mapping + ↓ +7. Backend: Antwort + { + "success": true, + "imported": 100, + "updated": 5, + "skipped": 2, + "errors": [{"row": 7, "error": "Invalid date"}], + "import_log_id": 456 // Für Rollback + } +``` + +### 4.3 System-Templates vs. User-Mappings + +**Hierarchie (Auto-Detection-Reihenfolge):** + +1. **User-Mappings** (profile_id = current_user) + - Höchste Priorität + - Exact Match → sofort verwenden + - Partial Match → als Vorschlag + +2. **System-Templates** (is_system = true, profile_id = NULL) + - Fallback wenn kein User-Mapping passt + - Read-only (User kann nicht ändern) + - User kann aber **Kopie erstellen** und anpassen + +**Permissions:** + +| Aktion | User-Mappings | System-Templates | +|--------|---------------|------------------| +| **Anzeigen** | ✅ Eigene | ✅ Alle | +| **Verwenden** | ✅ Eigene | ✅ Alle | +| **Erstellen** | ✅ Ja | ❌ Nur Admin/Migration | +| **Ändern** | ✅ Eigene | ❌ Nein (Kopie erstellen) | +| **Löschen** | ✅ Eigene | ❌ Nein | +| **Kopieren** | ✅ Ja | ✅ Ja → User-Mapping | + +**Workflow "System-Template anpassen":** + +``` +User wählt System-Template "FDDB Export (Standard)" + → User ändert Mapping (z.B. fügt Spalte hinzu) + → Frontend fragt: "System-Template kann nicht geändert werden. + Kopie erstellen? [Ja] [Abbrechen]" + → User klickt [Ja] + → Neue User-Mapping mit is_system=false, profile_id=current_user +``` + +--- + +## 5. Intelligente Features + +### 5.1 Auto-Detection (Spalten-Signatur-Matching) + +**Algorithmus:** + +1. **Exakte Signatur:** Spalten-Namen (normalisiert, sortiert) → 100% Match + ``` + ["date", "kcal", "protein_g"] → Mapping-ID 123 + ``` + +2. **Partial Match:** ≥70% Überlappung → Vorschlag + ``` + CSV: ["date", "calories", "protein"] + DB: ["date", "kcal", "protein_g"] + → Match: 66% → Mapping-ID 123 als Vorschlag + ``` + +3. **Fuzzy-Match:** Levenshtein-Distanz < 3 + ``` + "Datum" → "date" (Distance: 3) + "Kalorien" → "kcal" (keine exakte Match) + ``` + +### 5.2 Intelligente Vorschläge + +**Sample-basierte Analyse:** + +1. **Date-Detection:** Regex-Patterns für 20+ Formate + ```python + SAMPLES = ["01.01.2024", "02.01.2024", "03.01.2024"] + → Pattern: dd.mm.yyyy + → Vorschlag: Spalte "Datum" → Feld "date" + ``` + +2. **Number-Detection:** Statistik über Sample-Werte + ```python + SAMPLES = ["1500,5", "2000,3", "1800,0"] + → Decimal-Separator: "," + → Range: 1000-3000 → passt zu "kcal" + ``` + +3. **Unit-Detection:** Keyword-Search in Spalten-Namen + ```python + "Active Energy (kJ)" → Einheit: kJ → Feld: kcal (mit Conversion) + ``` + +### 5.3 Type-Conversion (20+ Formate) + +**Date-Formate:** +```python +DATE_PATTERNS = [ + "%Y-%m-%d", # 2024-01-15 (ISO) + "%d.%m.%Y", # 15.01.2024 (DE) + "%d/%m/%Y", # 15/01/2024 (UK) + "%m/%d/%Y", # 01/15/2024 (US) + "%Y-%m-%d %H:%M:%S", # Full timestamp + "%d.%m.%Y %H:%M", # FDDB format + # ... 15 weitere +] +``` + +**Number-Conversion:** +```python +def parse_number(value: str, decimal_sep=',', thousands_sep='.') -> float: + # "1.500,50" → 1500.50 + value = value.replace(thousands_sep, '') + value = value.replace(decimal_sep, '.') + return float(value) +``` + +**Unit-Conversion:** +```python +UNIT_CONVERSIONS = { + ("kJ", "kcal"): lambda x: x / 4.184, + ("kcal", "kJ"): lambda x: x * 4.184, + ("km", "mi"): lambda x: x * 0.621371, + ("mi", "km"): lambda x: x * 1.60934, + ("kg", "lb"): lambda x: x * 2.20462, + ("lb", "kg"): lambda x: x * 0.453592, +} +``` + +--- + +## 6. API-Endpoints + +### 6.1 Neue Endpoints + +#### **POST /api/csv/analyze** + +Analysiert hochgeladene CSV-Datei und schlägt Mappings vor. + +**Request:** +``` +Content-Type: multipart/form-data + +file: +module: "nutrition" +``` + +**Response:** +```json +{ + "encoding": "utf-8", + "delimiter": ";", + "columns": ["Datum", "Kalorien (kJ)", "Protein (g)", "Fett (g)"], + "sample_rows": [ + {"Datum": "01.01.2024", "Kalorien (kJ)": "8000", "Protein (g)": "80", "Fett (g)": "60"}, + {"Datum": "02.01.2024", "Kalorien (kJ)": "9000", "Protein (g)": "90", "Fett (g)": "70"} + ], + "detected_mappings": [ + { + "mapping_id": 123, + "mapping_name": "FDDB Export", + "confidence": 0.95, + "match_type": "exact_signature" + } + ], + "suggestions": { + "Datum": { + "suggested_field": "date", + "confidence": 0.98, + "type": "date", + "detected_format": "dd.mm.yyyy", + "sample_conversions": ["2024-01-01", "2024-01-02"] + }, + "Kalorien (kJ)": { + "suggested_field": "kcal", + "confidence": 0.85, + "type": "float", + "requires_conversion": true, + "source_unit": "kJ", + "target_unit": "kcal", + "sample_conversions": [1912.6, 2151.7] + } + }, + "available_fields": { + "date": {"type": "date", "required": true}, + "kcal": {"type": "float", "required": true, "min": 0, "max": 10000}, + "protein_g": {"type": "float", "required": false}, + "fat_g": {"type": "float", "required": false}, + "carbs_g": {"type": "float", "required": false} + } +} +``` + +#### **POST /api/csv/import** + +Führt Import mit bestätigtem Mapping aus. + +**Request:** +```json +{ + "file_data": "", // Oder file_id aus /analyze + "module": "nutrition", + "mapping": { + "field_mappings": { + "Datum": "date", + "Kalorien (kJ)": "kcal", + "Protein (g)": "protein_g" + }, + "type_conversions": { + "date": {"type": "date", "format": "dd.mm.yyyy"}, + "kcal": {"type": "float", "source_unit": "kJ", "conversion_factor": 0.239} + } + }, + "save_mapping": true, + "mapping_name": "FDDB Export 2024" +} +``` + +**Response:** +```json +{ + "success": true, + "import_log_id": 456, + "stats": { + "total_rows": 100, + "imported": 95, + "updated": 3, + "skipped": 2, + "errors": 0 + }, + "error_details": [], + "duration_ms": 1234 +} +``` + +#### **GET /api/csv/mappings** + +Liste gespeicherter Mappings (User + System-Templates). + +**Query-Params:** +- `module`: Filter nach Modul (optional) + +**Response:** +```json +{ + "system_templates": [ + { + "id": 1, + "module": "nutrition", + "name": "FDDB Export (Standard)", + "description": "Standard-Format für FDDB.de CSV-Exporte", + "is_system": true, + "usage_count": 1523, + "success_rate": 0.99, + "created_at": "2024-01-01T00:00:00" + }, + { + "id": 2, + "module": "activity", + "name": "Apple Health Workout Export", + "description": "Apple Health CSV-Export (English)", + "is_system": true, + "usage_count": 5043, + "success_rate": 0.98, + "created_at": "2024-01-01T00:00:00" + } + ], + "user_mappings": [ + { + "id": 123, + "module": "nutrition", + "name": "Mein FDDB Export (angepasst)", + "description": "FDDB mit Notizen", + "is_system": false, + "usage_count": 8, + "success_rate": 1.0, + "last_used_at": "2024-01-15T10:30:00", + "created_at": "2024-01-10T12:00:00" + } + ] +} +``` + +**Sortierung:** +- System-Templates: nach `usage_count DESC` (beliebteste zuerst) +- User-Mappings: nach `last_used_at DESC` (neueste zuerst) + +#### **POST /api/csv/mappings/{mapping_id}/copy** + +Erstellt User-Kopie eines System-Templates (für Anpassungen). + +**Response:** +```json +{ + "new_mapping_id": 124, + "message": "Kopie erstellt: 'FDDB Export (Standard)' → 'FDDB Export (Standard) - Kopie'" +} +``` + +#### **DELETE /api/csv/mappings/{mapping_id}** + +Löscht gespeichertes Mapping. + +**Permissions:** +- User können nur **eigene** Mappings löschen (profile_id = current_user) +- System-Templates (is_system = true) können **nicht** gelöscht werden +- Admin kann alle löschen (außer System-Templates) + +#### **POST /api/csv/rollback/{import_log_id}** + +Macht einen Import rückgängig (löscht importierte Einträge). + +**NICE-TO-HAVE:** Nur wenn Zeit bleibt. + +### 6.2 Bestehende Endpoints (Wrapper) + +Die bestehenden Endpoints **bleiben funktional** als dünner Wrapper: + +```python +# backend/routers/nutrition.py + +@router.post("/import-csv") +async def import_nutrition_csv(file: UploadFile, ...): + """ + LEGACY: FDDB-spezifischer Import (Backward-Kompatibilität). + Nutzt intern den Universal-Parser mit vordefiniertem FDDB-Template. + """ + # Wrapper um Universal-Parser: + from csv_parser import universal_import + + mapping = get_predefined_mapping("nutrition", "fddb") + result = await universal_import( + file=file, + module="nutrition", + mapping=mapping, + profile_id=pid + ) + + # Legacy Response-Format beibehalten: + return { + "imported": result["stats"]["imported"], + "skipped": result["stats"]["skipped"] + } +``` + +--- + +## 7. Frontend-UI (Skizze) + +### 7.1 CSV-Upload-Seite + +``` +┌─────────────────────────────────────────────────────────┐ +│ Daten importieren › CSV-Upload │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ Schritt 1: Datei hochladen │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ [📁 Datei auswählen] nutrition-export.csv │ │ +│ └─────────────────────────────────────────────────┘ │ +│ │ +│ Schritt 2: Modul auswählen │ +│ ○ Ernährung ○ Aktivität ○ Gewicht ○ Vitalwerte │ +│ │ +│ [Weiter →] │ +└─────────────────────────────────────────────────────────┘ +``` + +### 7.2 Mapping-Editor + +``` +┌──────────────────────────────────────────────────────────────┐ +│ CSV-Import › Mapping bearbeiten │ +├──────────────────────────────────────────────────────────────┤ +│ │ +│ ✓ Format erkannt: FDDB Export (95% Übereinstimmung) │ +│ │ +│ Spalten-Zuordnung: │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ CSV-Spalte → Datenbank-Feld │ │ +│ ├────────────────────────────────────────────────────────┤ │ +│ │ "Datum" → [date ▼] ✓ │ │ +│ │ "Kalorien (kJ)" → [kcal ▼] ⚠️ │ │ +│ │ └─ Umrechnung: kJ → kcal (÷4.184) │ │ +│ │ "Protein (g)" → [protein_g ▼] ✓ │ │ +│ │ "Fett (g)" → [fat_g ▼] ✓ │ │ +│ │ "Produkt" → [—nicht zuordnen—] │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ │ +│ Vorschau (erste 3 Zeilen): │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ date │ kcal │ protein_g │ fat_g │ │ │ +│ ├────────────────────────────────────────────────────────┤ │ +│ │ 2024-01-01 │ 1912.6 │ 80.0 │ 60.0 │ │ │ +│ │ 2024-01-02 │ 2151.7 │ 90.0 │ 70.0 │ │ │ +│ └────────────────────────────────────────────────────────┘ │ +│ │ +│ ☐ Mapping speichern als: [FDDB Export 2024________] │ +│ │ +│ [← Zurück] [Import starten →] │ +└──────────────────────────────────────────────────────────────┘ +``` + +### 7.3 Import-Fortschritt + +``` +┌─────────────────────────────────────────────────────────┐ +│ CSV-Import läuft... │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ ████████████████████░░░░░░░░ 80% (80/100 Zeilen) │ +│ │ +│ ✓ 75 Einträge importiert │ +│ ↻ 3 Einträge aktualisiert │ +│ ⊗ 2 Fehler │ +│ │ +│ [Abbrechen] │ +└─────────────────────────────────────────────────────────┘ +``` + +--- + +## 8. Implementierungs-Phasen + +### Phase 1: Foundation (Woche 1) **← START HIER** + +**Ziel:** Parser-Engine + Modul-Registry + System-Templates + +- [ ] **Migration:** + - `XXX_csv_parser_tables.sql` – `csv_field_mappings`, `csv_import_log` Tabellen + - `XXX_csv_parser_seed_templates.sql` – 12-15 System-Templates anlegen +- [ ] **Backend:** + - `csv_parser/core.py` – Encoding/Delimiter-Detection + - `csv_parser/module_registry.py` – Modul-Definitionen + - `csv_parser/type_converter.py` – Date/Number/Unit-Converter (20+ Formate) + - `csv_parser/permissions.py` – System-Template Read-Only-Check +- [ ] **Testing:** Unit-Tests für Type-Converter + System-Template-Seed + +**Output:** +- Funktionierender Parser (ohne Auto-Detection, ohne UI) +- 12-15 System-Templates in DB verfügbar +- User können Templates laden (aber nicht ändern) + +--- + +### Phase 2: Mapping-System (Woche 2) + +**Ziel:** Auto-Detection + Mapping-Persistenz + +- [ ] **Backend:** + - `csv_parser/mapping_engine.py` – Auto-Detection, Fuzzy-Match + - `csv_parser/suggestions.py` – Intelligente Vorschläge + - API: `/api/csv/analyze`, `/api/csv/mappings`, `/api/csv/mappings/{id}/copy` +- [ ] **Permissions:** System-Template Read-Only-Enforcement +- [ ] **Testing:** + - Auto-Detection-Tests mit realen CSV-Files (alle System-Templates) + - User vs. System Permissions (User kann nicht System-Template ändern) + - Copy-Workflow (System-Template → User-Mapping) + +**Output:** +- Auto-Detection funktioniert (User-Mappings > System-Templates) +- User können System-Templates kopieren und anpassen +- Permissions korrekt (Read-Only für System-Templates) + +--- + +### Phase 3: Import-Executor + API (Woche 2-3) + +**Ziel:** Import-Workflow komplett + +- [ ] **Backend:** + - `csv_parser/executor.py` – Batch-Insert, Validation, Rollback + - API: `/api/csv/import`, `/api/csv/mappings` +- [ ] **Migration:** Bestehende Import-Endpoints auf Wrapper umstellen +- [ ] **Testing:** End-to-End-Tests (Nutrition, Activity) + +**Output:** Import funktioniert via API, Legacy-Endpoints funktional + +--- + +### Phase 4: Frontend (Woche 3-4) + +**Ziel:** User-Interface für Mapping-Editor + +- [ ] **Frontend:** + - `CSVUploadPage.jsx` – Upload + Modul-Auswahl + - `CSVMappingEditor.jsx` – Spalten-zu-Feld-Zuordnung + - `CSVImportProgress.jsx` – Fortschritts-Anzeige + - `CSVMappingLibrary.jsx` – Gespeicherte Mappings anzeigen/auswählen +- [ ] **UX:** Drag & Drop für Spalten-Zuordnung +- [ ] **Testing:** E2E-Tests (Playwright) + +**Output:** Vollständige UI, User kann eigene Mappings erstellen + +--- + +### Phase 5: Rollout (Woche 4) + +**Ziel:** Alle Module migriert, Legacy-Code entfernt + +- [ ] Alle Module auf Universal-Parser migriert (Weight, Circumference, Caliper, Sleep) +- [ ] Legacy-Import-Code entfernt (nach Deprecation-Phase) +- [ ] Dokumentation aktualisiert +- [ ] Gitea Issue #21 geschlossen + +--- + +## 9. Offene Fragen (für User-Approval) + +1. **Scope:** Alle Module sofort oder schrittweise? (Empfehlung: Start mit Nutrition + Activity) +2. **Rollback:** Wichtig genug für Phase 1-3? Oder NICE-TO-HAVE? +3. **UI-Komplexität:** Drag & Drop oder simple Dropdowns? (Empfehlung: Dropdowns zuerst, D&D später) +4. **Performance:** Import-Limit pro File? (Empfehlung: 10.000 Zeilen, dann Batch-Upload) +5. **Migration:** Legacy-Endpoints sofort wrappen oder parallel laufen lassen? + +--- + +## 10. Aufwandsschätzung + +| Phase | Aufwand | Komponenten | +|-------|---------|-------------| +| **Phase 1** | 8-12h | Parser-Engine, Type-Converter, Migrations | +| **Phase 2** | 6-8h | Auto-Detection, Mapping-Engine, Suggestions | +| **Phase 3** | 8-10h | Import-Executor, API-Endpoints, Wrapper | +| **Phase 4** | 12-16h | Frontend UI (3-4 Komponenten) | +| **Phase 5** | 4-6h | Migration aller Module, Cleanup | +| **GESAMT** | **38-52h** | ~5-7 Arbeitstage | + +**Kritischer Pfad:** Phase 1 → Phase 2 → Phase 3 (Backend muss komplett sein vor Frontend) + +--- + +## 11. Risiken & Mitigations + +| Risiko | Wahrscheinlichkeit | Impact | Mitigation | +|--------|-------------------|--------|------------| +| **Date-Format-Vielfalt:** 20+ Formate schwer zu parsen | HOCH | MITTEL | Fallback auf Manual-Input, User kann Format angeben | +| **Performance:** Große Files (>10k Zeilen) langsam | MITTEL | MITTEL | Batch-Processing + Background-Job (Celery) | +| **Backward-Compatibility:** Legacy-Code bricht | NIEDRIG | HOCH | Parallel-Betrieb + Feature-Flag | +| **UX-Komplexität:** Mapping-Editor zu komplex | MITTEL | NIEDRIG | Wizard-Flow, Step-by-Step, gute Defaults | + +--- + +## 12. Erfolgskriterien + +✅ **User kann CSV-File hochladen ohne Code-Kenntnisse** +✅ **System erkennt bekannte Formate automatisch (≥80% Accuracy)** +✅ **User kann eigene Mappings speichern und wiederverwenden** +✅ **Import-Fehlerrate < 5% bei validen Daten** +✅ **Performance: 1000 Zeilen in < 5 Sekunden** +✅ **Alle bestehenden CSV-Imports funktionieren weiter (Wrapper)** + +--- + +**Nächster Schritt:** User-Approval für Konzept + Start Phase 1 (Foundation) + +**Geschätzter Start-to-Finish:** 5-7 Arbeitstage (bei Fokus-Arbeit ohne Unterbrechungen) diff --git a/.claude/rules/ARCHITECTURE_old.md b/.claude/rules/ARCHITECTURE_old.md new file mode 100644 index 0000000..13a5ea5 --- /dev/null +++ b/.claude/rules/ARCHITECTURE_old.md @@ -0,0 +1,339 @@ +# Architektur-Regeln – Mitai Jinkendo + +> **PFLICHTLEKTÜRE für Claude Code vor jeder Implementierung.** +> Diese Regeln sind verbindlich und dürfen nicht ohne explizite +> Genehmigung des Nutzers abgeändert werden. + +--- + +## 1. Router-Architektur + +### 1.1 Ein Modul = Ein Router +Jedes fachliche Modul hat genau eine Router-Datei in `backend/routers/`. + +``` +backend/routers/ +├── auth.py # Authentifizierung +├── profiles.py # Nutzerprofile +├── weight.py # Gewichts-Tracking +├── sleep.py # Schlaf-Modul +├── training_types.py # Trainingstypen + HF +└── ... # je neues Modul = neue Datei +``` + +**Regeln:** +- Kein Endpoint darf außerhalb seines thematischen Routers definiert werden +- Neue Module immer als neue Router-Datei anlegen, nie in bestehende einfügen +- Router in `main.py` registrieren: `app.include_router(modul.router, prefix="/api")` +- Router-Datei-Name = Modul-Name in `version.py` MODULE_VERSIONS + +### 1.2 API-First Prinzip +Jede Funktion ist zuerst als API-Endpoint implementiert – die UI nutzt ausschließlich +diese Endpoints über `api.js`. Keine Business-Logik im Frontend. + +```python +# ✅ Richtig: Logik im Backend-Endpoint +@router.get("/sleep/stats") +def get_sleep_stats(session=Depends(require_auth)): + # Berechnung hier + return {"avg_duration": ..., "sleep_debt": ...} + +# ❌ Falsch: Berechnung im Frontend +const sleepDebt = entries.reduce((sum, e) => sum + (goal - e.duration), 0) +``` + +### 1.3 Einheitliche Fehlerbehandlung +```python +# ✅ Immer dieses Format: +raise HTTPException(status_code=404, detail="Eintrag nicht gefunden") +# Response: {"detail": "Eintrag nicht gefunden"} + +# ❌ Nie eigene Formate: +return {"error": "not found"} +return {"message": "Fehler", "success": False} +``` + +--- + +## 2. Versionskontrollsystem + +### 2.1 Versionierungsschema +**Semantic Versioning: `MAJOR.MINOR.PATCH`** + +| Typ | Wann | Beispiel | +|-----|------|---------| +| MAJOR | Breaking Change, DB-Migration inkompatibel | 9.0.0 → 10.0.0 | +| MINOR | Neues Feature, neues Modul | 9.2.0 → 9.3.0 | +| PATCH | Bugfix, kleine Änderung, Refactor | 9.3.0 → 9.3.1 | + +### 2.2 Versions-Dateien + +**Backend: `backend/version.py`** +```python +APP_VERSION = "9.3.0" +BUILD_DATE = "2026-03-22" + +MODULE_VERSIONS = { + "auth": "1.2.0", + "profiles": "1.1.0", + "weight": "1.0.3", + "circumference": "1.0.1", + "caliper": "1.0.1", + "activity": "1.1.0", + "nutrition": "1.0.2", + "photos": "1.0.0", + "insights": "1.3.0", + "prompts": "1.1.0", + "admin": "1.2.0", + "stats": "1.0.1", + "exportdata": "1.1.0", + "importdata": "1.0.0", + "membership": "2.1.0", +} + +CHANGELOG = [ + { + "version": "9.3.0", + "date": "2026-03-22", + "changes": [ + "Feature: Sleep Module (sleep_log, JSONB-Segmente)", + "Feature: Vitalwerte-Seite in Navigation", + "Feature: Trainingstypen-Kategorisierung", + ] + }, + { + "version": "9.2.1", + "date": "2026-03-20", + "changes": [ + "Fix: Feature-Enforcement Rollback", + "Fix: Erholungsstatus-Gewichtung korrigiert", + ] + }, +] +``` + +**Frontend: `frontend/src/version.js`** +```javascript +export const APP_VERSION = "9.3.0" +export const BUILD_DATE = "2026-03-22" + +export const PAGE_VERSIONS = { + Dashboard: "1.3.0", + LoginScreen: "1.1.0", + WeightPage: "1.0.3", + ActivityPage: "1.2.0", + NutritionPage: "1.1.0", + AnalysisPage: "1.3.0", + SettingsPage: "1.4.0", + AdminPanel: "1.2.0", + SubscriptionPage: "1.0.0", + // Neue Seiten hier eintragen +} +``` + +### 2.3 Versions-Endpoint + +**`GET /api/version`** – öffentlich (kein Auth erforderlich) + +```json +{ + "app_version": "9.3.0", + "build_date": "2026-03-22", + "backend_version": "9.3.0", + "modules": { + "auth": "1.2.0", + "sleep": "1.0.0" + }, + "db_schema_version": "20260322", + "environment": "production" +} +``` + +Dieser Endpoint wird in `backend/routers/version.py` implementiert und liest +direkt aus `version.py`. + +### 2.4 Versions-Anzeige in der App + +**Settings-Seite – Versions-Panel:** +``` +System-Versionen +───────────────────────────────────── +App (gesamt) 9.3.0 +Backend 9.3.0 ✓ erreichbar +Frontend 9.3.0 ✓ geladen +DB-Schema 20260322 +Umgebung production +───────────────────────────────────── +Module +auth 1.2.0 +sleep 1.0.0 +membership 2.1.0 +[alle Module...] +───────────────────────────────────── +[Changelog] [Cache leeren] +``` + +Frontend ruft beim Laden der Settings-Seite `/api/version` ab und vergleicht +mit der eigenen `APP_VERSION` aus `version.js`. Bei Abweichung: Warnung anzeigen. + +### 2.5 Pflicht-Regel: Versions-Bump bei jedem Commit + +**Jede Code-Änderung erfordert:** +1. Versions-Bump in `backend/version.py` (APP_VERSION + betroffenes MODULE_VERSION) +2. Versions-Bump in `frontend/src/version.js` (APP_VERSION + betroffene PAGE_VERSION) +3. Changelog-Eintrag in `backend/version.py` CHANGELOG + +**Claude Code prüft das im `/deploy` Command automatisch.** + +Kein Commit ohne Versions-Bump – keine Ausnahme. + +### 2.6 DB-Schema-Version + +Format: `YYYYMMDD` (Datum der letzten Migration) + +Gespeichert in `backend/version.py`: +```python +DB_SCHEMA_VERSION = "20260322" +``` + +Bei jeder Schema-Änderung (ALTER TABLE, neue Tabelle) → DB_SCHEMA_VERSION aktualisieren. + +--- + +## 3. Datenbankregeln + +### 3.1 Pflichtfelder für neue Tabellen +```sql +-- Jede neue Tabelle braucht: +id SERIAL PRIMARY KEY, +created_at TIMESTAMP DEFAULT NOW(), +updated_at TIMESTAMP DEFAULT NOW() +``` + +### 3.2 Source-Tracking bei Import-Daten +Tabellen die Daten aus externen Quellen empfangen brauchen: +```sql +source VARCHAR(50) DEFAULT 'manual' +-- Werte: 'manual' | 'apple_health' | 'garmin' | 'withings' +``` + +Manuelle Einträge (`source = 'manual'`) haben IMMER Vorrang bei Reimport: +```sql +-- Reimport überschreibt nur nicht-manuelle Einträge: +INSERT INTO sleep_log (...) ON CONFLICT (profile_id, date) +DO UPDATE SET ... WHERE sleep_log.source != 'manual' +``` + +### 3.3 Profile-ID Isolation +Jede Tabelle mit Nutzerdaten hat `profile_id` als Foreign Key. +Kein Endpoint gibt Daten eines anderen Profils zurück. +Profile-ID kommt IMMER aus der Session, nie aus Request-Parametern. + +### 3.4 Boolean-Werte +```sql +-- PostgreSQL Boolean (nicht SQLite 0/1): +WHERE active = true ✓ +WHERE active = 1 ✗ +``` + +--- + +## 4. Frontend-Regeln + +### 4.1 Alle API-Calls über api.js +```javascript +// ✅ Richtig: +import { api } from '../utils/api' +const data = await api.listSleep() + +// ❌ Falsch: +const r = await fetch('/api/sleep') +``` + +### 4.2 Neue Seite = Eintrag in PAGE_VERSIONS +Jede neue Seite in `frontend/src/version.js` registrieren. + +### 4.3 CSS-Variablen statt Hardcoded-Farben +```javascript +// ✅ Richtig: +style={{color: 'var(--accent)'}} + +// ❌ Falsch: +style={{color: '#1D9E75'}} +``` + +### 4.4 Fehlerbehandlung in allen async Funktionen +```javascript +try { + const data = await api.meinEndpoint() + setData(data) +} catch(e) { + setError(e.message) +} finally { + setLoading(false) +} +``` + +--- + +## 5. Git & Deployment-Regeln + +### 5.1 Nie direkt auf main pushen +Immer über Pull Request in Gitea: develop → main. +develop Branch niemals löschen. + +### 5.2 Commit-Message Format +``` +feat: neues Feature oder Modul +fix: Bugfix +refactor: Umbau ohne Funktionsänderung +docs: Dokumentation +version: Versions-Bump +ci: CI/CD Änderungen +chore: Maintenance +``` + +### 5.3 Versions-Bump im Commit +``` +feat: Sleep Module v1.0.0 + +- sleep_log Tabelle mit JSONB-Segmenten +- Import aus Apple Health CSV +- Korrelationen Schlaf <-> Ruhepuls + +version: 9.3.0 (backend + frontend) +module: sleep 1.0.0 +``` + +--- + +## 6. Dokumentations-Regeln + +### 6.1 Neue Module dokumentieren +Bei jedem neuen Modul: +1. Fachliche Spec: `.claude/docs/functional/MODUL_NAME.md` +2. Technische Spec: `.claude/docs/technical/MODUL_NAME.md` +3. Nach Fertigstellung: `.claude/library/` aktualisieren + +### 6.2 CLAUDE.md aktuell halten +Nach größeren Änderungen CLAUDE.md Versions-Tabelle aktualisieren. + +### 6.3 Lessons Learned dokumentieren +Jeder Rollback oder schwerer Bug → Eintrag in `.claude/rules/LESSONS_LEARNED.md` + +--- + +## Zusammenfassung: Checkliste vor jedem Commit + +``` +[ ] Versions-Bump in backend/version.py (APP_VERSION + MODULE) +[ ] Versions-Bump in frontend/src/version.js (APP_VERSION + PAGE) +[ ] Changelog-Eintrag in backend/version.py +[ ] DB_SCHEMA_VERSION aktualisiert (wenn Schema geändert) +[ ] Neues Modul in PAGE_VERSIONS / MODULE_VERSIONS eingetragen +[ ] Auth auf alle neuen Endpoints (require_auth) +[ ] Fehlerformat einheitlich (HTTPException mit detail) +[ ] Neue Tabellen haben created_at + updated_at +[ ] Import-Tabellen haben source-Feld +[ ] api.js für alle Frontend API-Calls +``` diff --git a/backend/csv_parser/__init__.py b/backend/csv_parser/__init__.py new file mode 100644 index 0000000..86786d8 --- /dev/null +++ b/backend/csv_parser/__init__.py @@ -0,0 +1,27 @@ +"""Universal CSV import foundation (Issue #21).""" + +from csv_parser.core import ( + decode_raw_bytes, + sniff_delimiter, + parse_csv_sample, + column_signature, + normalize_header_for_signature, +) +from csv_parser.module_registry import MODULE_DEFINITIONS, get_module_definition, list_modules +from csv_parser.type_converter import convert_value, build_row_after_mapping +from csv_parser.permissions import user_may_delete_mapping, user_may_edit_mapping_row + +__all__ = [ + "decode_raw_bytes", + "sniff_delimiter", + "parse_csv_sample", + "column_signature", + "normalize_header_for_signature", + "MODULE_DEFINITIONS", + "get_module_definition", + "list_modules", + "convert_value", + "build_row_after_mapping", + "user_may_delete_mapping", + "user_may_edit_mapping_row", +] diff --git a/backend/csv_parser/core.py b/backend/csv_parser/core.py new file mode 100644 index 0000000..dd4f225 --- /dev/null +++ b/backend/csv_parser/core.py @@ -0,0 +1,137 @@ +""" +CSV bytes → text, delimiter sniffing, strukturierte Erstzeilen für Analyse (Issue #21). +""" +from __future__ import annotations + +import csv +import io +import re +from typing import Any, List, Tuple + +_DEFAULT_DELIMS = [",", ";", "\t"] + + +def decode_raw_bytes(raw: bytes) -> str: + """UTF-8 bevorzugt, Fallback Latin-1; BOM entfernen.""" + if not raw: + return "" + for enc in ("utf-8-sig", "utf-8", "latin-1"): + try: + text = raw.decode(enc) + break + except UnicodeDecodeError: + text = "" + continue + else: + text = raw.decode("utf-8", errors="replace") + if text.startswith("\ufeff"): + text = text[1:] + return text + + +def sniff_delimiter(sample_line: str) -> str: + """ + Heuristik: Zähle Vorkommen der Kandidaten in der ersten Datenzeile. + Kein csv.Sniffer (robuster gegen kurze Zeilen). + """ + if not sample_line or not sample_line.strip(): + return "," + best = "," + best_count = -1 + for d in _DEFAULT_DELIMS: + c = sample_line.count(d) + if c > best_count: + best_count = c + best = d + return best + + +def _split_first_lines(text: str, max_lines: int = 5) -> List[str]: + lines: List[str] = [] + for line in text.splitlines(): + if line.strip(): + lines.append(line) + if len(lines) >= max_lines: + break + return lines + + +def parse_csv_sample( + text: str, + delimiter: str | None = None, + has_header: bool = True, + max_data_rows: int = 5, +) -> Tuple[List[str], List[dict[str, str]], str]: + """ + Gibt (headers, rows_as_dicts, verwendetes_delimiter) zurück. + rows sind Rohstrings pro Zelle. + """ + lines = _split_first_lines(text, max_lines=50) + if not lines: + return [], [], "," + + delim = delimiter if delimiter is not None else sniff_delimiter(lines[0]) + reader = csv.reader(io.StringIO(text.replace("\r\n", "\n").replace("\r", "\n")), delimiter=delim) + rows_raw: List[List[str]] = [] + for i, row in enumerate(reader): + if i >= 1 + max_data_rows + (1 if has_header else 0): + break + if not any(c.strip() for c in row): + continue + rows_raw.append(row) + + if not rows_raw: + return [], [], delim + + if has_header: + headers = [h.strip() for h in rows_raw[0]] + data = rows_raw[1 : 1 + max_data_rows] + else: + n = len(rows_raw[0]) + headers = [f"col_{i}" for i in range(n)] + data = rows_raw[:max_data_rows] + + dict_rows: List[dict[str, str]] = [] + for r in data: + row_dict: dict[str, str] = {} + for j, h in enumerate(headers): + row_dict[h] = r[j].strip() if j < len(r) else "" + dict_rows.append(row_dict) + + return headers, dict_rows, delim + + +def normalize_header_for_signature(name: str) -> str: + s = name.strip().lower() + s = re.sub(r"\s+", "_", s) + s = re.sub(r"[^a-z0-9_äöüß().%-]+", "_", s) + return s.strip("_") + + +def column_signature(headers: List[str]) -> List[str]: + """Sortierte normalisierte Spaltennamen für Signatur-Vergleich.""" + return sorted({normalize_header_for_signature(h) for h in headers if h is not None and str(h).strip()}) + + +def headers_signature_match_score(sig_csv: List[str], sig_template: List[str]) -> float: + """Jaccard-Überlappung 0..1.""" + a, b = set(sig_csv), set(sig_template) + if not a and not b: + return 1.0 + if not a or not b: + return 0.0 + inter = len(a & b) + union = len(a | b) + return inter / union if union else 0.0 + + +def get_csv_import_limits(conn_row: dict | None) -> dict[str, int]: + """Liest Limits aus system_config.csv_import; Fallback bei fehlendem Key.""" + defaults = {"max_rows_per_file": 50_000, "max_file_bytes": 52_428_800} + if not conn_row or "value" not in conn_row: + return defaults + val = conn_row["value"] + if isinstance(val, dict): + out = {**defaults, **{k: int(v) for k, v in val.items() if k in defaults}} + return out + return defaults diff --git a/backend/csv_parser/module_registry.py b/backend/csv_parser/module_registry.py new file mode 100644 index 0000000..c433a00 --- /dev/null +++ b/backend/csv_parser/module_registry.py @@ -0,0 +1,88 @@ +""" +Ziel-Module für CSV-Import: Tabellen-Felder, Pflichtfelder, Duplikat-Strategie (Issue #21). + +Hinweis: blood_pressure nutzt in der DB measured_at; Logik-Felder measured_date + measured_time +werden im Executor zu measured_at zusammengefügt (Phase Import-Executor). + +Activity: date kann aus start_time (ISO-Datetime) abgeleitet werden, wenn nur start_time gesetzt ist. +""" + +from __future__ import annotations + +from typing import Any, Dict, cast + +MODULE_DEFINITIONS: Dict[str, Dict[str, Any]] = { + "nutrition": { + "table": "nutrition_log", + "fields": { + "date": {"type": "date", "required": True}, + "kcal": {"type": "float", "required": False}, + "protein_g": {"type": "float", "required": False, "min": 0}, + "fat_g": {"type": "float", "required": False, "min": 0}, + "carbs_g": {"type": "float", "required": False, "min": 0}, + }, + "duplicate_key": ["profile_id", "date"], + "duplicate_strategy": "update", + }, + "activity": { + "table": "activity_log", + "fields": { + "date": {"type": "date", "required": True}, + "start_time": {"type": "time", "required": False}, + "end_time": {"type": "time", "required": False}, + "activity_type": {"type": "string", "required": True}, + "duration_min": {"type": "float", "required": False, "min": 0}, + "kcal_active": {"type": "float", "required": False}, + "distance_km": {"type": "float", "required": False}, + "hr_avg": {"type": "float", "required": False, "min": 30, "max": 220}, + }, + "derive_date_from_datetime_field": "start_time", + "duplicate_key": ["profile_id", "date", "start_time"], + "duplicate_strategy": "update", + }, + "blood_pressure": { + "table": "blood_pressure_log", + "fields": { + "measured_date": {"type": "date", "required": True}, + "measured_time": {"type": "time", "required": True}, + "systolic": {"type": "int", "required": True}, + "diastolic": {"type": "int", "required": True}, + "pulse": {"type": "int", "required": False}, + }, + "logical_to_db": "blood_pressure_composite_measured_at", + "duplicate_key": ["profile_id", "measured_at"], + "duplicate_strategy": "update", + }, + "weight": { + "table": "weight_log", + "fields": { + "date": {"type": "date", "required": True}, + "weight": {"type": "float", "required": True, "min": 20, "max": 400}, + "note": {"type": "string", "required": False, "max_length": 2000}, + }, + "duplicate_key": ["profile_id", "date"], + "duplicate_strategy": "update", + }, +} + + +def get_module_definition(module: str) -> Dict[str, Any] | None: + return MODULE_DEFINITIONS.get(module) + + +def list_modules() -> list[str]: + return sorted(MODULE_DEFINITIONS.keys()) + + +def validate_field_mappings(module: str, field_mappings: dict) -> None: + """Wirft ValueError bei unbekanntem Modul oder unbekanntem DB-Feld.""" + mod = get_module_definition(module) + if not mod: + raise ValueError(f"Unbekanntes Modul: {module}") + fields = cast(dict, mod["fields"]) + allowed = set(fields.keys()) + for _csv_col, db_field in field_mappings.items(): + if db_field in ("", None, "-"): + continue + if db_field not in allowed: + raise ValueError(f"Ungültiges Zielfeld '{db_field}' für Modul '{module}'") diff --git a/backend/csv_parser/permissions.py b/backend/csv_parser/permissions.py new file mode 100644 index 0000000..df56899 --- /dev/null +++ b/backend/csv_parser/permissions.py @@ -0,0 +1,19 @@ +"""Zugriffsregeln für csv_field_mappings (Issue #21).""" + +from __future__ import annotations + +from typing import Any, Mapping + + +def user_may_edit_mapping_row(row: Mapping[str, Any], session: Mapping[str, Any]) -> bool: + if session.get("role") == "admin": + return True + if row.get("is_system"): + return False + return str(row.get("profile_id")) == str(session.get("profile_id")) + + +def user_may_delete_mapping(row: Mapping[str, Any], session: Mapping[str, Any]) -> bool: + if row.get("is_system"): + return False + return str(row.get("profile_id")) == str(session.get("profile_id")) diff --git a/backend/csv_parser/type_converter.py b/backend/csv_parser/type_converter.py new file mode 100644 index 0000000..6d161f8 --- /dev/null +++ b/backend/csv_parser/type_converter.py @@ -0,0 +1,142 @@ +""" +Typkonvertierung für CSV-Zellen gemäß type_conversions-JSON (Issue #21). +""" + +from __future__ import annotations + +import datetime as dt +import re +from decimal import Decimal, InvalidOperation +from typing import Any, Mapping + +# Alias → strptime (JSON in Kleinbuchstaben) +DATE_FORMAT_STRPTIME: dict[str, str] = { + "yyyy-mm-dd": "%Y-%m-%d", + "mm/dd/yyyy": "%m/%d/%Y", + "dd/mm/yyyy": "%d/%m/%Y", + "dd.mm.yyyy": "%d.%m.%Y", + "dd.mm.yyyy HH:MM": "%d.%m.%Y %H:%M", + "yyyy-mm-dd HH:MM:SS": "%Y-%m-%d %H:%M:%S", + "yyyy-mm-dd hh:mm:ss": "%Y-%m-%d %H:%M:%S", +} + +TIME_FORMAT_STRPTIME: dict[str, str] = { + "HH:MM": "%H:%M", + "HH:MM:SS": "%H:%M:%S", +} + + +def _parse_float(raw: str, decimal_sep: str = ".") -> float: + s = raw.strip() + if not s: + raise ValueError("leer") + if decimal_sep == ",": + s = s.replace(".", "").replace(",", ".") + else: + s = s.replace(",", "") + return float(Decimal(s)) + + +def _parse_int(raw: str) -> int: + s = re.sub(r"[^\d-]", "", raw.strip()) + if not s: + raise ValueError("leer") + return int(s) + + +def convert_value( + raw: str, + db_field: str, + spec: Mapping[str, Any] | None, +) -> Any: + """ + Konvertiert eine Roh-Zelle in einen Python-Wert. + spec kommt aus type_conversions[db_field]. + """ + if spec is None: + return raw.strip() if raw else None + if raw is None: + return None + s = raw.strip() + if s == "": + return None + + t = spec.get("type", "string") + if t == "string": + return s + + if t in ("float", "number"): + dec = spec.get("decimal_separator", ".") + v = _parse_float(s, dec) + factor = spec.get("conversion_factor") + if factor is not None: + v = float(v) * float(factor) + return v + + if t == "int": + return _parse_int(s) + + if t == "date": + fmt_key = str(spec.get("format", "yyyy-mm-dd")) + fmt = DATE_FORMAT_STRPTIME.get(fmt_key.lower()) + if not fmt: + raise ValueError(f"Unbekanntes Datumsformat: {fmt_key}") + part = dt.datetime.strptime(s, fmt) + extract = spec.get("extract", "date_only") + if extract == "date_only": + return part.date() + return part + + if t == "time": + fmt_key = str(spec.get("format", "HH:MM")) + fmt = TIME_FORMAT_STRPTIME.get(fmt_key, fmt_key) + part = dt.datetime.strptime(s, fmt) + return part.time() + + if t == "datetime": + fmt_key = str(spec.get("format", "yyyy-mm-dd HH:MM:SS")) + fmt = DATE_FORMAT_STRPTIME.get(fmt_key.lower()) + if not fmt: + raise ValueError(f"Unbekanntes Datetime-Format: {fmt_key}") + return dt.datetime.strptime(s, fmt) + + if t == "duration": + # z. B. HH:MM:SS → Minuten + fmt_key = str(spec.get("format", "HH:MM:SS")) + target = spec.get("target_unit", "minutes") + parts = s.split(":") + if fmt_key == "HH:MM:SS" and len(parts) == 3: + h, m, sec = int(parts[0]), int(parts[1]), int(parts[2]) + total_min = h * 60 + m + sec / 60.0 + if target == "minutes": + return round(total_min, 4) + raise ValueError(f"Unbekannte duration target_unit: {target}") + if fmt_key == "HH:MM" and len(parts) == 2: + h, m = int(parts[0]), int(parts[1]) + return h * 60 + m + raise ValueError(f"Duration nicht parsbar: {s!r}") + + return s + + +def build_row_after_mapping( + csv_row: Mapping[str, str], + field_mappings: Mapping[str, str], + type_conversions: Mapping[str, Any] | None, +) -> dict[str, Any]: + """ + Wendet Zuordnung csv_spalte → db_feld und Typkonvertierung an. + Unzugeordnete oder „—“ werden übersprungen. + """ + out: dict[str, Any] = {} + tc = type_conversions or {} + for csv_col, raw in csv_row.items(): + db_field = field_mappings.get(csv_col) + if not db_field or db_field in ("-", "_skip"): + continue + spec = tc.get(db_field) + try: + out[db_field] = convert_value(raw, db_field, spec if isinstance(spec, dict) else None) + except Exception: + out[db_field] = None + return out diff --git a/backend/main.py b/backend/main.py index 5ad6444..ea30f84 100644 --- a/backend/main.py +++ b/backend/main.py @@ -31,6 +31,7 @@ from routers import workflows # Phase 2 Workflow Engine - Execution from routers import reference_values # Persönliche Referenzwerte (Profil) from routers import admin_reference_value_types # Admin: Referenzwert-Typen from routers import app_dashboard # Geschützter App-Bereich: Dashboard-Lab Layout +from routers import csv_import, admin_csv_templates # Issue #21 Universal CSV Parser # ── App Configuration ───────────────────────────────────────────────────────── DATA_DIR = Path(os.getenv("DATA_DIR", "./data")) @@ -121,6 +122,8 @@ app.include_router(workflows.router) # /api/workflows/* (Phase 2 Exec app.include_router(reference_values.router) # /api/reference-value-types, /api/profile-reference-values app.include_router(admin_reference_value_types.router) # /api/admin/reference-value-types app.include_router(app_dashboard.router) # /api/app/dashboard-layout +app.include_router(csv_import.router) # /api/csv/* (Issue #21) +app.include_router(admin_csv_templates.router) # /api/admin/csv-templates/* (Issue #21) # ── Health Check ────────────────────────────────────────────────────────────── @app.get("/") diff --git a/backend/migrations/042_csv_parser_tables.sql b/backend/migrations/042_csv_parser_tables.sql new file mode 100644 index 0000000..3a2303a --- /dev/null +++ b/backend/migrations/042_csv_parser_tables.sql @@ -0,0 +1,75 @@ +-- Migration 042: Universal CSV Parser – Mapping-Registry & Import-Log (Issue #21) +-- Tabellen für System-Templates (profile_id NULL, is_system true) und User-Mappings. + +CREATE TABLE IF NOT EXISTS csv_field_mappings ( + id SERIAL PRIMARY KEY, + profile_id UUID REFERENCES profiles(id) ON DELETE CASCADE, + is_system BOOLEAN NOT NULL DEFAULT false, + module VARCHAR(50) NOT NULL, + mapping_name VARCHAR(100) NOT NULL, + description TEXT, + column_signature TEXT[] NOT NULL DEFAULT '{}', + delimiter VARCHAR(10) NOT NULL DEFAULT ',', + encoding VARCHAR(20) NOT NULL DEFAULT 'utf-8', + has_header BOOLEAN NOT NULL DEFAULT true, + field_mappings JSONB NOT NULL DEFAULT '{}', + type_conversions JSONB, + usage_count INTEGER NOT NULL DEFAULT 0, + last_used_at TIMESTAMPTZ, + success_rate REAL NOT NULL DEFAULT 1.0, + created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT csv_field_mappings_system_profile CHECK ( + (is_system = true AND profile_id IS NULL) + OR (is_system = false AND profile_id IS NOT NULL) + ) +); + +COMMENT ON TABLE csv_field_mappings IS 'CSV Import: System-Templates + User-Mappings (Issue #21)'; +COMMENT ON COLUMN csv_field_mappings.is_system IS 'true = globales Template (nur Admin pflegbar), false = User-Mapping'; + +CREATE UNIQUE INDEX IF NOT EXISTS idx_csv_field_mappings_system_module_name + ON csv_field_mappings (module, mapping_name) + WHERE is_system = true AND profile_id IS NULL; + +CREATE UNIQUE INDEX IF NOT EXISTS idx_csv_field_mappings_user_module_name + ON csv_field_mappings (profile_id, module, mapping_name) + WHERE is_system = false; + +CREATE INDEX IF NOT EXISTS idx_csv_field_mappings_module_profile + ON csv_field_mappings (module, profile_id); + +CREATE INDEX IF NOT EXISTS idx_csv_field_mappings_system_module + ON csv_field_mappings (module) + WHERE is_system = true; + +CREATE TABLE IF NOT EXISTS csv_import_log ( + id SERIAL PRIMARY KEY, + profile_id UUID NOT NULL REFERENCES profiles(id) ON DELETE CASCADE, + mapping_id INTEGER REFERENCES csv_field_mappings(id) ON DELETE SET NULL, + module VARCHAR(50) NOT NULL, + filename VARCHAR(255), + rows_total INTEGER, + rows_imported INTEGER, + rows_updated INTEGER, + rows_skipped INTEGER, + rows_errors INTEGER, + error_details JSONB, + started_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + finished_at TIMESTAMPTZ, + status VARCHAR(20) NOT NULL DEFAULT 'running', + affected_ids JSONB +); + +CREATE INDEX IF NOT EXISTS idx_csv_import_log_profile_module + ON csv_import_log (profile_id, module DESC, started_at DESC); + +COMMENT ON COLUMN csv_import_log.affected_ids IS 'Pro Import gesammelte Primärschlüssel je Tabelle (Rollback / Bereinigung)'; + +INSERT INTO system_config (key, value, updated_at) +VALUES ( + 'csv_import', + '{"max_rows_per_file": 50000, "max_file_bytes": 52428800}'::jsonb, + CURRENT_TIMESTAMP +) +ON CONFLICT (key) DO NOTHING; diff --git a/backend/migrations/043_csv_parser_seed_templates.sql b/backend/migrations/043_csv_parser_seed_templates.sql new file mode 100644 index 0000000..93f8016 --- /dev/null +++ b/backend/migrations/043_csv_parser_seed_templates.sql @@ -0,0 +1,314 @@ +-- Migration 043: CSV Parser – System-Templates (Issue #21) +-- Idempotent: pro Template nur einfügen, wenn noch kein System-Eintrag für module+mapping_name existiert. + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, + true, + 'nutrition', + 'FDDB Export (Standard)', + 'Standard-Format für FDDB.de CSV-Exporte (Deutsch). Delimiter Semikolon, kJ → kcal Konvertierung.', + ARRAY['datum_tag_monat_jahr_stunde_minute', 'fett_g', 'kh_g', 'kj', 'protein_g']::TEXT[], + ';', + 'utf-8', + true, + '{ + "datum_tag_monat_jahr_stunde_minute": "date", + "kj": "kcal", + "fett_g": "fat_g", + "kh_g": "carbs_g", + "protein_g": "protein_g" + }'::JSONB, + '{ + "date": { + "type": "date", + "format": "dd.mm.yyyy HH:MM", + "extract": "date_only" + }, + "kcal": { + "type": "float", + "source_unit": "kJ", + "target_unit": "kcal", + "conversion_factor": 0.239, + "decimal_separator": "," + }, + "fat_g": {"type": "float", "decimal_separator": ","}, + "carbs_g": {"type": "float", "decimal_separator": ","}, + "protein_g": {"type": "float", "decimal_separator": ","} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'nutrition' AND f.mapping_name = 'FDDB Export (Standard)' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'nutrition', 'MyFitnessPal Export', + 'Standard CSV export from MyFitnessPal (English)', + ARRAY['Carbohydrates (g)', 'Calories', 'Date', 'Fat (g)', 'Protein (g)']::TEXT[], + ',', 'utf-8', true, + '{ + "Date": "date", + "Calories": "kcal", + "Fat (g)": "fat_g", + "Carbohydrates (g)": "carbs_g", + "Protein (g)": "protein_g" + }'::JSONB, + '{ + "date": {"type": "date", "format": "yyyy-mm-dd"}, + "kcal": {"type": "float", "decimal_separator": "."}, + "fat_g": {"type": "float", "decimal_separator": "."}, + "carbs_g": {"type": "float", "decimal_separator": "."}, + "protein_g": {"type": "float", "decimal_separator": "."} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'nutrition' AND f.mapping_name = 'MyFitnessPal Export' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'nutrition', 'Cronometer Export', + 'Cronometer daily nutrition export (English)', + ARRAY['Day', 'Energy (kcal)', 'Fat (g)', 'Net Carbs (g)', 'Protein (g)']::TEXT[], + ',', 'utf-8', true, + '{ + "Day": "date", + "Energy (kcal)": "kcal", + "Fat (g)": "fat_g", + "Net Carbs (g)": "carbs_g", + "Protein (g)": "protein_g" + }'::JSONB, + '{ + "date": {"type": "date", "format": "yyyy-mm-dd"}, + "kcal": {"type": "float", "decimal_separator": "."}, + "fat_g": {"type": "float", "decimal_separator": "."}, + "carbs_g": {"type": "float", "decimal_separator": "."}, + "protein_g": {"type": "float", "decimal_separator": "."} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'nutrition' AND f.mapping_name = 'Cronometer Export' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'activity', 'Apple Health Workout Export (English)', + 'Apple Health CSV-Export für Workouts (English). Automatisches Training-Type-Mapping.', + ARRAY['Active Energy (kcal)', 'Distance (km)', 'Duration', 'End', 'Heart Rate Average (bpm)', 'Start', 'Workout Type']::TEXT[], + ',', 'utf-8', true, + '{ + "Workout Type": "activity_type", + "Start": "start_time", + "End": "end_time", + "Duration": "duration_min", + "Distance (km)": "distance_km", + "Active Energy (kcal)": "kcal_active", + "Heart Rate Average (bpm)": "hr_avg" + }'::JSONB, + '{ + "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time"}, + "end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS"}, + "duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"}, + "distance_km": {"type": "float", "decimal_separator": "."}, + "kcal_active": {"type": "float", "decimal_separator": "."}, + "hr_avg": {"type": "int"} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'activity' AND f.mapping_name = 'Apple Health Workout Export (English)' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'activity', 'Apple Health Workout Export (Deutsch)', + 'Apple Health CSV-Export für Workouts (Deutsch). Automatisches Training-Type-Mapping.', + ARRAY['Aktive Energie (kcal)', 'Dauer', 'Durchschnittliche Herzfrequenz (bpm)', 'Ende', 'Start', 'Strecke (km)', 'Trainingsart']::TEXT[], + ',', 'utf-8', true, + '{ + "Trainingsart": "activity_type", + "Start": "start_time", + "Ende": "end_time", + "Dauer": "duration_min", + "Strecke (km)": "distance_km", + "Aktive Energie (kcal)": "kcal_active", + "Durchschnittliche Herzfrequenz (bpm)": "hr_avg" + }'::JSONB, + '{ + "start_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_and_time"}, + "end_time": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS"}, + "duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"}, + "distance_km": {"type": "float", "decimal_separator": ","}, + "kcal_active": {"type": "float", "decimal_separator": ","}, + "hr_avg": {"type": "int"} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'activity' AND f.mapping_name = 'Apple Health Workout Export (Deutsch)' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'activity', 'Garmin Connect Export', + 'Garmin Connect activity CSV export (English)', + ARRAY['Activity Type', 'Avg HR', 'Calories', 'Date', 'Distance', 'Duration', 'Time']::TEXT[], + ',', 'utf-8', true, + '{ + "Activity Type": "activity_type", + "Date": "date", + "Time": "start_time", + "Duration": "duration_min", + "Distance": "distance_km", + "Calories": "kcal_active", + "Avg HR": "hr_avg" + }'::JSONB, + '{ + "date": {"type": "date", "format": "yyyy-mm-dd"}, + "start_time": {"type": "time", "format": "HH:MM:SS"}, + "duration_min": {"type": "duration", "format": "HH:MM:SS", "target_unit": "minutes"}, + "distance_km": {"type": "float", "decimal_separator": "."}, + "kcal_active": {"type": "float", "decimal_separator": "."}, + "hr_avg": {"type": "int"} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'activity' AND f.mapping_name = 'Garmin Connect Export' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'blood_pressure', 'Omron Export (Deutsch)', + 'Omron Blutdruckmessgerät CSV-Export (Deutsch)', + ARRAY['Datum', 'Diastolisch (mmHg)', 'Puls (bpm)', 'Systolisch (mmHg)', 'Zeit']::TEXT[], + ',', 'utf-8', true, + '{ + "Datum": "measured_date", + "Zeit": "measured_time", + "Systolisch (mmHg)": "systolic", + "Diastolisch (mmHg)": "diastolic", + "Puls (bpm)": "pulse" + }'::JSONB, + '{ + "measured_date": {"type": "date", "format": "dd.mm.yyyy"}, + "measured_time": {"type": "time", "format": "HH:MM"}, + "systolic": {"type": "int"}, + "diastolic": {"type": "int"}, + "pulse": {"type": "int"} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'blood_pressure' AND f.mapping_name = 'Omron Export (Deutsch)' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'blood_pressure', 'Omron Export (English)', + 'Omron blood pressure monitor CSV export (English)', + ARRAY['Date', 'Diastolic (mmHg)', 'Pulse (bpm)', 'Systolic (mmHg)', 'Time']::TEXT[], + ',', 'utf-8', true, + '{ + "Date": "measured_date", + "Time": "measured_time", + "Systolic (mmHg)": "systolic", + "Diastolic (mmHg)": "diastolic", + "Pulse (bpm)": "pulse" + }'::JSONB, + '{ + "measured_date": {"type": "date", "format": "mm/dd/yyyy"}, + "measured_time": {"type": "time", "format": "HH:MM"}, + "systolic": {"type": "int"}, + "diastolic": {"type": "int"}, + "pulse": {"type": "int"} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'blood_pressure' AND f.mapping_name = 'Omron Export (English)' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'weight', 'Apple Health Weight Export', + 'Apple Health body mass CSV export', + ARRAY['Body Mass (kg)', 'Start']::TEXT[], + ',', 'utf-8', true, + '{ + "Start": "date", + "Body Mass (kg)": "weight" + }'::JSONB, + '{ + "date": {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "extract": "date_only"}, + "weight": {"type": "float", "decimal_separator": "."} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'weight' AND f.mapping_name = 'Apple Health Weight Export' +); + +INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions +) +SELECT + NULL, true, 'weight', 'Withings Export', + 'Withings smart scale CSV export (weight, body fat, muscle mass)', + ARRAY['Body Fat (%)', 'Date', 'Muscle Mass (kg)', 'Weight (kg)']::TEXT[], + ',', 'utf-8', true, + '{ + "Date": "date", + "Weight (kg)": "weight" + }'::JSONB, + '{ + "date": {"type": "date", "format": "yyyy-mm-dd"}, + "weight": {"type": "float", "decimal_separator": "."} + }'::JSONB +WHERE NOT EXISTS ( + SELECT 1 FROM csv_field_mappings f + WHERE f.is_system AND f.profile_id IS NULL + AND f.module = 'weight' AND f.mapping_name = 'Withings Export' +); diff --git a/backend/routers/admin_csv_templates.py b/backend/routers/admin_csv_templates.py new file mode 100644 index 0000000..5fe1ca4 --- /dev/null +++ b/backend/routers/admin_csv_templates.py @@ -0,0 +1,245 @@ +""" +Admin: System-CSV-Templates (csv_field_mappings, is_system=true) pflegen (Issue #21). +""" +from __future__ import annotations + +from typing import Any, List, Optional + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, Field +from psycopg2.extras import Json + +from auth import require_admin +from db import get_db, get_cursor, r2d +from csv_parser.core import get_csv_import_limits +from csv_parser.module_registry import get_module_definition, validate_field_mappings + +router = APIRouter(prefix="/api/admin/csv-templates", tags=["admin", "csv-import"]) + + +class CsvSystemTemplateCreate(BaseModel): + module: str + mapping_name: str = Field(..., min_length=1, max_length=100) + description: Optional[str] = None + column_signature: List[str] = Field(default_factory=list) + delimiter: str = "," + encoding: str = "utf-8" + has_header: bool = True + field_mappings: dict = Field(default_factory=dict) + type_conversions: Optional[dict] = None + + +class CsvSystemTemplateUpdate(BaseModel): + mapping_name: Optional[str] = Field(default=None, min_length=1, max_length=100) + description: Optional[str] = None + column_signature: Optional[List[str]] = None + delimiter: Optional[str] = None + encoding: Optional[str] = None + has_header: Optional[bool] = None + field_mappings: Optional[dict] = None + type_conversions: Optional[dict] = None + + +class CsvImportLimitsBody(BaseModel): + max_rows_per_file: int = Field(..., ge=100, le=2_000_000) + max_file_bytes: int = Field(..., ge=10_000, le=2_147_483_648) + + +def _row_full(m: dict) -> dict: + return { + "id": m["id"], + "module": m["module"], + "mapping_name": m["mapping_name"], + "description": m.get("description"), + "column_signature": list(m["column_signature"]) if m.get("column_signature") else [], + "delimiter": m["delimiter"], + "encoding": m["encoding"], + "has_header": m["has_header"], + "field_mappings": m["field_mappings"], + "type_conversions": m.get("type_conversions"), + "usage_count": m.get("usage_count"), + "success_rate": m.get("success_rate"), + "last_used_at": m.get("last_used_at"), + "created_at": m.get("created_at"), + "updated_at": m.get("updated_at"), + "is_system": m["is_system"], + } + + +@router.get("/import-limits") +def admin_get_csv_import_limits(session: dict = Depends(require_admin)): + with get_db() as conn: + cur = get_cursor(conn) + cur.execute("SELECT value FROM system_config WHERE key = %s", ("csv_import",)) + row = cur.fetchone() + return get_csv_import_limits(r2d(row) if row else None) + + +@router.put("/import-limits") +def admin_put_csv_import_limits(body: CsvImportLimitsBody, session: dict = Depends(require_admin)): + payload = {"max_rows_per_file": body.max_rows_per_file, "max_file_bytes": body.max_file_bytes} + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + """ + INSERT INTO system_config (key, value, updated_at) + VALUES ('csv_import', %s, CURRENT_TIMESTAMP) + ON CONFLICT (key) DO UPDATE + SET value = EXCLUDED.value, updated_at = CURRENT_TIMESTAMP + """, + (Json(payload),), + ) + return payload + + +@router.get("") +def list_system_templates( + module: Optional[str] = None, + session: dict = Depends(require_admin), +): + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + """ + SELECT * FROM csv_field_mappings + WHERE is_system = true AND profile_id IS NULL + AND (%s::text IS NULL OR module = %s) + ORDER BY module, mapping_name + """, + (module, module), + ) + rows = [r2d(r) for r in cur.fetchall()] + return {"templates": [_row_full(m) for m in rows]} + + +@router.get("/{template_id}") +def get_system_template(template_id: int, session: dict = Depends(require_admin)): + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + "SELECT * FROM csv_field_mappings WHERE id = %s AND is_system = true AND profile_id IS NULL", + (template_id,), + ) + m = r2d(cur.fetchone()) + if not m: + raise HTTPException(404, "System-Template nicht gefunden") + return _row_full(m) + + +@router.post("") +def create_system_template(body: CsvSystemTemplateCreate, session: dict = Depends(require_admin)): + if not get_module_definition(body.module): + raise HTTPException(400, f"Unbekanntes Modul: {body.module}") + try: + validate_field_mappings(body.module, body.field_mappings) + except ValueError as e: + raise HTTPException(400, str(e)) + + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + """ + INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions + ) VALUES ( + NULL, true, %s, %s, %s, %s, %s, %s, %s, %s, %s + ) RETURNING id + """, + ( + body.module, + body.mapping_name, + body.description, + body.column_signature, + body.delimiter, + body.encoding, + body.has_header, + Json(body.field_mappings), + Json(body.type_conversions) if body.type_conversions is not None else None, + ), + ) + new_id = cur.fetchone()["id"] + return {"id": new_id} + + +@router.put("/{template_id}") +def update_system_template( + template_id: int, + body: CsvSystemTemplateUpdate, + session: dict = Depends(require_admin), +): + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + "SELECT * FROM csv_field_mappings WHERE id = %s AND is_system = true AND profile_id IS NULL", + (template_id,), + ) + existing = r2d(cur.fetchone()) + if not existing: + raise HTTPException(404, "System-Template nicht gefunden") + + patch: dict[str, Any] = body.model_dump(exclude_unset=True) + if not patch: + return _row_full(existing) + + fm = patch.get("field_mappings", existing["field_mappings"]) + if "field_mappings" in patch: + try: + validate_field_mappings(existing["module"], fm) + except ValueError as e: + raise HTTPException(400, str(e)) + + fields_sql = [] + vals: list = [] + if "mapping_name" in patch: + fields_sql.append("mapping_name = %s") + vals.append(patch["mapping_name"]) + if "description" in patch: + fields_sql.append("description = %s") + vals.append(patch["description"]) + if "column_signature" in patch: + fields_sql.append("column_signature = %s") + vals.append(patch["column_signature"]) + if "delimiter" in patch: + fields_sql.append("delimiter = %s") + vals.append(patch["delimiter"]) + if "encoding" in patch: + fields_sql.append("encoding = %s") + vals.append(patch["encoding"]) + if "has_header" in patch: + fields_sql.append("has_header = %s") + vals.append(patch["has_header"]) + if "field_mappings" in patch: + fields_sql.append("field_mappings = %s") + vals.append(Json(patch["field_mappings"])) + if "type_conversions" in patch: + fields_sql.append("type_conversions = %s") + tc = patch["type_conversions"] + vals.append(Json(tc) if tc is not None else None) + + fields_sql.append("updated_at = CURRENT_TIMESTAMP") + vals.append(template_id) + + cur.execute( + f"UPDATE csv_field_mappings SET {', '.join(fields_sql)} WHERE id = %s", + tuple(vals), + ) + + cur.execute("SELECT * FROM csv_field_mappings WHERE id = %s", (template_id,)) + m = r2d(cur.fetchone()) + return _row_full(m) + + +@router.delete("/{template_id}") +def delete_system_template(template_id: int, session: dict = Depends(require_admin)): + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + "DELETE FROM csv_field_mappings WHERE id = %s AND is_system = true AND profile_id IS NULL RETURNING id", + (template_id,), + ) + row = cur.fetchone() + if not row: + raise HTTPException(404, "System-Template nicht gefunden") + return {"deleted": template_id} diff --git a/backend/routers/csv_import.py b/backend/routers/csv_import.py new file mode 100644 index 0000000..93b498f --- /dev/null +++ b/backend/routers/csv_import.py @@ -0,0 +1,254 @@ +""" +CSV-Import: Nutzer-Endpunkte für Analyse, Mappings, Limits (Issue #21). +""" +from __future__ import annotations + +import logging +from typing import Any, Optional + +from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile +from pydantic import BaseModel + +from psycopg2.extras import Json + +from auth import require_auth +from db import get_db, get_cursor, r2d +from csv_parser.core import ( + decode_raw_bytes, + column_signature, + get_csv_import_limits, + headers_signature_match_score, + normalize_header_for_signature, + parse_csv_sample, +) +from csv_parser.module_registry import get_module_definition, list_modules, validate_field_mappings + +router = APIRouter(prefix="/api/csv", tags=["csv-import"]) +logger = logging.getLogger(__name__) + + +def _load_import_limits() -> dict[str, int]: + with get_db() as conn: + cur = get_cursor(conn) + cur.execute("SELECT value FROM system_config WHERE key = %s", ("csv_import",)) + row = cur.fetchone() + return get_csv_import_limits(r2d(row) if row else None) + + +def _mapping_to_summary(m: dict) -> dict: + return { + "id": m["id"], + "module": m["module"], + "name": m["mapping_name"], + "description": m.get("description"), + "is_system": m["is_system"], + "usage_count": m.get("usage_count"), + "success_rate": m.get("success_rate"), + "last_used_at": m.get("last_used_at"), + "created_at": m.get("created_at"), + } + + +@router.get("/modules") +def csv_modules(session: dict = Depends(require_auth)): + """Unterstützte Import-Module und Felddefinitionen.""" + out = [] + for mid in list_modules(): + d = get_module_definition(mid) + if d: + out.append({"id": mid, "table": d["table"], "fields": d["fields"]}) + return {"modules": out} + + +@router.get("/limits") +def csv_limits(session: dict = Depends(require_auth)): + """Admin-konfigurierbare Import-Limits (system_config.csv_import).""" + return _load_import_limits() + + +@router.get("/mappings") +def list_csv_mappings( + module: Optional[str] = None, + session: dict = Depends(require_auth), +): + """System-Templates + eigene User-Mappings.""" + pid = str(session["profile_id"]) + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + """ + SELECT id, module, mapping_name, description, is_system, profile_id, + usage_count, success_rate, last_used_at, created_at + FROM csv_field_mappings + WHERE is_system = true + AND (%s::text IS NULL OR module = %s) + ORDER BY usage_count DESC NULLS LAST, mapping_name + """, + (module, module), + ) + system_rows = [r2d(r) for r in cur.fetchall()] + + cur.execute( + """ + SELECT id, module, mapping_name, description, is_system, profile_id, + usage_count, success_rate, last_used_at, created_at + FROM csv_field_mappings + WHERE is_system = false AND profile_id = %s::uuid + AND (%s::text IS NULL OR module = %s) + ORDER BY last_used_at DESC NULLS LAST, mapping_name + """, + (pid, module, module), + ) + user_rows = [r2d(r) for r in cur.fetchall()] + + return { + "system_templates": [_mapping_to_summary(m) for m in system_rows], + "user_mappings": [_mapping_to_summary(m) for m in user_rows], + } + + +class CopyMappingBody(BaseModel): + name: Optional[str] = None + + +@router.post("/mappings/{mapping_id}/copy") +def copy_csv_mapping( + mapping_id: int, + body: CopyMappingBody | None = None, + session: dict = Depends(require_auth), +): + """System- oder eigenes Mapping als neues User-Mapping kopieren.""" + pid = str(session["profile_id"]) + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + """ + SELECT * FROM csv_field_mappings WHERE id = %s + """, + (mapping_id,), + ) + src = r2d(cur.fetchone()) + if not src: + raise HTTPException(404, "Mapping nicht gefunden") + if not src["is_system"] and str(src.get("profile_id")) != pid: + raise HTTPException(403, "Kein Zugriff auf dieses Mapping") + + base_name = (body.name if body and body.name else None) or f"{src['mapping_name']} (Kopie)" + name = base_name + n = 1 + while True: + cur.execute( + """ + SELECT 1 FROM csv_field_mappings + WHERE profile_id = %s::uuid AND module = %s AND mapping_name = %s + """, + (pid, src["module"], name), + ) + if not cur.fetchone(): + break + n += 1 + name = f"{base_name} {n}" + + cur.execute( + """ + INSERT INTO csv_field_mappings ( + profile_id, is_system, module, mapping_name, description, + column_signature, delimiter, encoding, has_header, + field_mappings, type_conversions, usage_count, success_rate + ) VALUES ( + %s::uuid, false, %s, %s, %s, + %s, %s, %s, %s, %s, %s, 0, 1.0 + ) RETURNING id + """, + ( + pid, + src["module"], + name, + src.get("description"), + src["column_signature"], + src["delimiter"], + src["encoding"], + src["has_header"], + Json(src["field_mappings"]), + Json(src["type_conversions"]) if src.get("type_conversions") is not None else None, + ), + ) + new_id = cur.fetchone()["id"] + return {"new_mapping_id": new_id, "mapping_name": name} + + +@router.post("/analyze") +async def analyze_csv( + file: UploadFile = File(...), + module: str = Form(...), + delimiter: Optional[str] = Form(default=None), + session: dict = Depends(require_auth), +): + """ + Erste Zeilen parsen, Signatur bilden, System-Templates nach Ähnlichkeit ranken. + """ + if not get_module_definition(module): + raise HTTPException(400, f"Unbekanntes Modul: {module}") + + raw = await file.read() + limits = _load_import_limits() + max_bytes = limits.get("max_file_bytes", 52_428_800) + if len(raw) > max_bytes: + raise HTTPException( + 413, + f"Datei zu groß (max. {max_bytes} Bytes laut Systemkonfiguration)", + ) + + text = decode_raw_bytes(raw) + max_rows = limits.get("max_rows_per_file", 50_000) + if text.count("\n") > max_rows + 5: + raise HTTPException( + 413, + f"Zu viele Zeilen (>{max_rows}) laut Systemkonfiguration csv_import.max_rows_per_file", + ) + delim = delimiter if delimiter in (",", ";", "\t") else None + headers, sample_rows, used_delim = parse_csv_sample(text, delimiter=delim, max_data_rows=5) + sig = column_signature(headers) + + mod_def = get_module_definition(module) + available_fields = mod_def["fields"] if mod_def else {} + + with get_db() as conn: + cur = get_cursor(conn) + cur.execute( + """ + SELECT id, module, mapping_name, description, column_signature, + delimiter, encoding, has_header, field_mappings, type_conversions, is_system + FROM csv_field_mappings + WHERE is_system = true AND module = %s + """, + (module,), + ) + templates = [r2d(r) for r in cur.fetchall()] + + ranked = [] + for t in templates: + t_sig = list(t["column_signature"]) if t["column_signature"] else [] + t_norm = sorted({normalize_header_for_signature(str(s)) for s in t_sig}) + score = headers_signature_match_score(sig, t_norm) + ranked.append( + { + "mapping_id": t["id"], + "mapping_name": t["mapping_name"], + "confidence": round(score, 4), + "match_type": "signature_jaccard", + } + ) + ranked.sort(key=lambda x: -x["confidence"]) + + return { + "module": module, + "filename": file.filename, + "encoding_note": "utf-8/latin-1 mit BOM-Strip", + "delimiter": used_delim, + "columns": headers, + "column_signature_normalized": sig, + "sample_rows": sample_rows, + "detected_mappings": ranked[:5], + "available_fields": available_fields, + } diff --git a/backend/tests/test_csv_parser_core.py b/backend/tests/test_csv_parser_core.py new file mode 100644 index 0000000..f721909 --- /dev/null +++ b/backend/tests/test_csv_parser_core.py @@ -0,0 +1,69 @@ +"""Tests für CSV-Parser Foundation (Issue #21).""" + +import pytest + +from csv_parser.core import ( + decode_raw_bytes, + sniff_delimiter, + parse_csv_sample, + column_signature, + headers_signature_match_score, + get_csv_import_limits, +) +from csv_parser.type_converter import convert_value, build_row_after_mapping + + +def test_decode_bom_utf8(): + raw = "\ufeffa;b;c\n1;2;3".encode("utf-8-sig") + t = decode_raw_bytes(raw) + assert not t.startswith("\ufeff") + assert "a;b;c" in t + + +def test_sniff_delimiter(): + assert sniff_delimiter("a;b;c;d") == ";" + assert sniff_delimiter("a,b,c") == "," + + +def test_parse_csv_sample_header(): + text = "Date;kcal\n2024-01-01;2000\n" + headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3) + assert headers == ["Date", "kcal"] + assert delim == ";" + assert rows[0]["Date"] == "2024-01-01" + assert rows[0]["kcal"] == "2000" + + +def test_column_signature_sorted_unique(): + sig = column_signature(["B", "a", "a"]) + assert sig == ["a", "b"] + + +def test_jaccard(): + s1 = column_signature(["Date", "Calories"]) + s2 = column_signature(["Date", "Calories", "Fat"]) + assert headers_signature_match_score(s1, s2) == pytest.approx(2 / 3) + + +def test_get_csv_import_limits_default(): + assert get_csv_import_limits(None)["max_rows_per_file"] == 50_000 + + +def test_convert_date_and_kcal_factor(): + d = convert_value("15.01.2024", "date", {"type": "date", "format": "dd.mm.yyyy"}) + assert d.year == 2024 and d.month == 1 and d.day == 15 + + k = convert_value("8000", "kcal", {"type": "float", "conversion_factor": 0.239, "decimal_separator": "."}) + assert abs(k - 8000 * 0.239) < 0.01 + + +def test_build_row_after_mapping(): + csv_row = {"Datum": "01.01.2024", "kj": "4200"} + fm = {"Datum": "date", "kj": "kcal"} + tc = { + "date": {"type": "date", "format": "dd.mm.yyyy"}, + "kcal": {"type": "float", "conversion_factor": 0.239, "decimal_separator": "."}, + } + out = build_row_after_mapping(csv_row, fm, tc) + assert out["date"].month == 1 + assert out["kcal"] is not None diff --git a/backend/version.py b/backend/version.py index d29eab6..d4095f0 100644 --- a/backend/version.py +++ b/backend/version.py @@ -9,7 +9,7 @@ Semantic Versioning: MAJOR.MINOR.PATCH APP_VERSION = "0.9p" BUILD_DATE = "2026-04-09" -DB_SCHEMA_VERSION = "20260406e" # Migration 041 +DB_SCHEMA_VERSION = "20260409a" # Migration 043 (042–043 CSV Parser) MODULE_VERSIONS = { "auth": "1.2.0", @@ -31,9 +31,21 @@ MODULE_VERSIONS = { "membership": "2.1.0", "workflow": "0.6.0", # Phase 4: End Node Template Engine "app_dashboard": "1.11.0", # Entitlements: DB-Override widget→features (AND), sonst Katalog + "csv_import": "0.1.0", # Issue #21: Analyse, Mappings, Limits + "admin_csv_templates": "0.1.0", # Issue #21: System-Templates + Import-Limits (Admin) } CHANGELOG = [ + { + "version": "0.9p", + "date": "2026-04-09", + "changes": [ + "Issue #21 Phase 1: Migration 042/043 (csv_field_mappings, csv_import_log, Seeds)", + "csv_parser: core (Decode/Delimiter/Sample), module_registry, type_converter, permissions", + "API /api/csv: modules, limits, mappings, analyze, copy", + "API /api/admin/csv-templates: CRUD System-Templates, import-limits (system_config)", + ], + }, { "version": "0.9n", "date": "2026-04-06", diff --git a/frontend/src/utils/api.js b/frontend/src/utils/api.js index 8d974eb..e12ef87 100644 --- a/frontend/src/utils/api.js +++ b/frontend/src/utils/api.js @@ -481,4 +481,36 @@ export const api = { // Placeholder Metadata Export (v1.0) exportPlaceholdersExtendedJson: () => req('/prompts/placeholders/export-values-extended'), + + // Universal CSV Import (Issue #21) + getCsvModules: () => req('/csv/modules'), + getCsvLimits: () => req('/csv/limits'), + getCsvMappings: (module = null) => + req(module ? `/csv/mappings?module=${encodeURIComponent(module)}` : '/csv/mappings'), + copyCsvMapping: (mappingId, body = null) => + req(`/csv/mappings/${mappingId}/copy`, body ? json(body) : { method: 'POST' }), + analyzeCsv: async (file, module, delimiter = null) => { + const fd = new FormData() + fd.append('file', file) + fd.append('module', module) + if (delimiter) fd.append('delimiter', delimiter) + const res = await fetch(BASE + '/csv/analyze', { method: 'POST', headers: hdrs(), body: fd }) + if (!res.ok) { + const errText = await res.text() + let parsed = null + try { + parsed = JSON.parse(errText) + } catch { /* ignore */ } + throw new Error(formatFastApiDetail(parsed?.detail, errText.trim() || `HTTP ${res.status}`)) + } + return res.json() + }, + adminListCsvTemplates: (module = null) => + req(module ? `/admin/csv-templates?module=${encodeURIComponent(module)}` : '/admin/csv-templates'), + adminGetCsvTemplate: (id) => req(`/admin/csv-templates/${id}`), + adminCreateCsvTemplate: (d) => req('/admin/csv-templates', json(d)), + adminUpdateCsvTemplate: (id, d) => req(`/admin/csv-templates/${id}`, jput(d)), + adminDeleteCsvTemplate: (id) => req(`/admin/csv-templates/${id}`, { method: 'DELETE' }), + adminGetCsvImportLimits: () => req('/admin/csv-templates/import-limits'), + adminPutCsvImportLimits: (d) => req('/admin/csv-templates/import-limits', jput(d)), }