""" CSV → Zieltabellen: Upsert, Fehlerliste, affected_ids für csv_import_log (Issue #21). """ from __future__ import annotations import datetime as dt import uuid from collections import defaultdict from typing import Any from csv_parser.core import iter_csv_dict_rows from csv_parser.module_registry import get_module_definition from csv_parser.type_converter import build_row_after_mapping def coerce_date(val: Any) -> dt.date | None: if val is None: return None if isinstance(val, dt.datetime): return val.date() if isinstance(val, dt.date): return val return None def _derive_bp_context(hour: int) -> str: if 5 <= hour < 10: return "morning_fasted" if 18 <= hour < 23: return "evening" return "other" def run_universal_csv_import( cur, profile_id: str, module: str, text: str, filename: str, mapping: dict[str, Any], ) -> dict[str, Any]: """ Nutzt cur innerhalb einer bestehenden Transaktion. Gibt Statistik + affected_ids (+ error_details) zurück. """ mod = get_module_definition(module) if not mod: raise ValueError(f"Unbekanntes Modul: {module}") fm = mapping.get("field_mappings") or {} if isinstance(fm, str): raise ValueError("field_mappings muss ein Objekt sein") tc = mapping.get("type_conversions") if tc is not None and not isinstance(tc, dict): tc = None delim = mapping.get("delimiter") or "," has_header = mapping.get("has_header", True) rows_total = 0 error_details: list[dict[str, Any]] = [] affected_ids: dict[str, list[str]] = defaultdict(list) if module == "nutrition": stats = _import_nutrition( cur, profile_id, text, delim, bool(has_header), fm, tc, error_details, affected_ids, ) rows_total = stats.pop("rows_total") elif module == "weight": stats = _import_weight( cur, profile_id, text, delim, bool(has_header), fm, tc, error_details, affected_ids, ) rows_total = stats.pop("rows_total") elif module == "blood_pressure": stats = _import_blood_pressure( cur, profile_id, text, delim, bool(has_header), fm, tc, error_details, affected_ids, ) rows_total = stats.pop("rows_total") else: raise ValueError(f"Modul '{module}' wird für Universal-Import noch nicht unterstützt") out = { "rows_total": rows_total, "rows_imported": stats.get("inserted", 0), "rows_updated": stats.get("updated", 0), "rows_skipped": stats.get("skipped", 0), "rows_errors": len(error_details), "error_details": error_details[:50], "new_entries": stats.get("new_entries", stats.get("inserted", 0)), "affected_ids": dict(affected_ids), } return out def _import_nutrition( cur, profile_id: str, text: str, delim: str, has_header: bool, fm: dict, tc: dict | None, error_details: list, affected_ids: dict, ) -> dict[str, int]: agg: dict[str, dict[str, float]] = defaultdict( lambda: {"kcal": 0.0, "protein_g": 0.0, "fat_g": 0.0, "carbs_g": 0.0} ) rows_total = 0 for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header): rows_total += 1 mapped = build_row_after_mapping(csv_row, fm, tc) d = coerce_date(mapped.get("date")) if d is None: error_details.append({"row": rows_total, "error": "Datum fehlt oder ungültig"}) continue iso = d.isoformat() for key in ("kcal", "protein_g", "fat_g", "carbs_g"): v = mapped.get(key) if v is not None: try: agg[iso][key] += float(v) except (TypeError, ValueError): pass inserted = 0 updated = 0 new_entries = 0 for iso, vals in agg.items(): kcal = round(vals["kcal"], 1) fat = round(vals["fat_g"], 1) carbs = round(vals["carbs_g"], 1) prot = round(vals["protein_g"], 1) if kcal == 0 and fat == 0 and carbs == 0 and prot == 0: continue cur.execute( "SELECT id FROM nutrition_log WHERE profile_id=%s AND date=%s", (profile_id, iso), ) existing = cur.fetchone() if existing: cur.execute( """ UPDATE nutrition_log SET kcal=%s, protein_g=%s, fat_g=%s, carbs_g=%s, source='csv' WHERE profile_id=%s AND date=%s RETURNING id """, (kcal, prot, fat, carbs, profile_id, iso), ) row = cur.fetchone() updated += 1 if row and row.get("id"): affected_ids["nutrition_log"].append(str(row["id"])) else: eid = str(uuid.uuid4()) cur.execute( """ INSERT INTO nutrition_log (id, profile_id, date, kcal, protein_g, fat_g, carbs_g, source, created) VALUES (%s,%s,%s,%s,%s,%s,%s,'csv',CURRENT_TIMESTAMP) """, (eid, profile_id, iso, kcal, prot, fat, carbs), ) inserted += 1 new_entries += 1 affected_ids["nutrition_log"].append(eid) return { "rows_total": rows_total, "inserted": inserted, "updated": updated, "skipped": 0, "new_entries": new_entries, } def _import_weight( cur, profile_id: str, text: str, delim: str, has_header: bool, fm: dict, tc: dict | None, error_details: list, affected_ids: dict, ) -> dict[str, int]: rows_total = 0 inserted = 0 updated = 0 new_entries = 0 for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header): rows_total += 1 mapped = build_row_after_mapping(csv_row, fm, tc) d = coerce_date(mapped.get("date")) w = mapped.get("weight") note = mapped.get("note") if d is None: error_details.append({"row": rows_total, "error": "Datum fehlt"}) continue if w is None: error_details.append({"row": rows_total, "error": "Gewicht fehlt"}) continue try: w = float(w) except (TypeError, ValueError): error_details.append({"row": rows_total, "error": "Gewicht ungültig"}) continue iso = d.isoformat() cur.execute( "SELECT id FROM weight_log WHERE profile_id=%s AND date=%s", (profile_id, iso), ) existing = cur.fetchone() if existing: cur.execute( """ UPDATE weight_log SET weight=%s, note=COALESCE(%s, note), source='csv' WHERE profile_id=%s AND date=%s RETURNING id """, (w, note, profile_id, iso), ) row = cur.fetchone() updated += 1 if row and row.get("id"): affected_ids["weight_log"].append(str(row["id"])) else: eid = str(uuid.uuid4()) cur.execute( """ INSERT INTO weight_log (id, profile_id, date, weight, note, source, created) VALUES (%s,%s,%s,%s,%s,'csv',CURRENT_TIMESTAMP) """, (eid, profile_id, iso, w, note), ) inserted += 1 new_entries += 1 affected_ids["weight_log"].append(eid) return { "rows_total": rows_total, "inserted": inserted, "updated": updated, "skipped": 0, "new_entries": new_entries, } def _import_blood_pressure( cur, profile_id: str, text: str, delim: str, has_header: bool, fm: dict, tc: dict | None, error_details: list, affected_ids: dict, ) -> dict[str, int]: rows_total = 0 inserted = 0 updated = 0 skipped = 0 for csv_row in iter_csv_dict_rows(text, delim, has_header=has_header): rows_total += 1 mapped = build_row_after_mapping(csv_row, fm, tc) md = coerce_date(mapped.get("measured_date")) mt = mapped.get("measured_time") if md is None: error_details.append({"row": rows_total, "error": "Datum fehlt"}) continue if mt is None: error_details.append({"row": rows_total, "error": "Zeit fehlt"}) continue if isinstance(mt, str): try: parts = mt.replace(".", ":").split(":") if len(parts) >= 2: mt = dt.time(int(parts[0]), int(parts[1]), int(parts[2]) if len(parts) > 2 else 0) else: raise ValueError() except Exception: error_details.append({"row": rows_total, "error": "Zeit ungültig"}) continue if not isinstance(mt, dt.time): error_details.append({"row": rows_total, "error": "Zeitformat wird nicht unterstützt"}) continue systolic = mapped.get("systolic") diastolic = mapped.get("diastolic") pulse = mapped.get("pulse") try: sys_i = int(systolic) dia_i = int(diastolic) except (TypeError, ValueError): error_details.append({"row": rows_total, "error": "Blutdruckwerte fehlen oder ungültig"}) continue pulse_i = int(pulse) if pulse is not None else None measured_at = dt.datetime.combine(md, mt) hour = mt.hour context = _derive_bp_context(hour) cur.execute( """ SELECT id FROM blood_pressure_log WHERE profile_id = %s AND measured_at = %s """, (profile_id, measured_at), ) existing_bp = cur.fetchone() if existing_bp: cur.execute( """ UPDATE blood_pressure_log SET systolic = %s, diastolic = %s, pulse = %s, context = %s, source = 'csv' WHERE profile_id = %s AND measured_at = %s RETURNING id """, (sys_i, dia_i, pulse_i, context, profile_id, measured_at), ) row = cur.fetchone() updated += 1 if row and row.get("id"): affected_ids["blood_pressure_log"].append(str(row["id"])) else: cur.execute( """ INSERT INTO blood_pressure_log ( profile_id, measured_at, systolic, diastolic, pulse, context, source ) VALUES (%s, %s, %s, %s, %s, %s, 'csv') RETURNING id """, (profile_id, measured_at, sys_i, dia_i, pulse_i, context), ) row = cur.fetchone() inserted += 1 if row and row.get("id"): affected_ids["blood_pressure_log"].append(str(row["id"])) return { "rows_total": rows_total, "inserted": inserted, "updated": updated, "skipped": skipped, "new_entries": inserted, }