From 3106ebedae37a8545c35fe93bb1067ae934d64e3 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 21 Apr 2026 08:03:43 +0200
Subject: [PATCH 1/3] feat: enhance lag correlation calculations and chart
 metadata

- Updated `calculate_lag_correlation` to include detailed interpretations and lag details for energy balance vs. weight change, protein vs. lean mass, and load vs. vital metrics.
- Improved handling of insufficient data scenarios in correlation charts, providing clearer messages and metadata for user insights.
- Refactored chart functions to utilize best lag values and correlation data more effectively, enhancing the visualization of relationships between metrics.
---
 backend/data_layer/correlations.py | 401 ++++++++++++++++++++++++-----
 backend/routers/charts.py          | 100 ++++---
 2 files changed, 400 insertions(+), 101 deletions(-)

diff --git a/backend/data_layer/correlations.py b/backend/data_layer/correlations.py
index 5ab2ac2..a0ed2fc 100644
--- a/backend/data_layer/correlations.py
+++ b/backend/data_layer/correlations.py
@@ -17,28 +17,29 @@ Phase 0c: Multi-Layer Architecture
 Version: 1.0
 """
 
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
+
 from datetime import datetime, timedelta, date
 from db import get_db, get_cursor, r2d
 import statistics
 
+from data_layer.nutrition_body_merge import build_merged_daily_nutrition_body_rows
+from data_layer.nutrition_metrics import estimate_tdee_kcal_from_latest_weight
+
+# Lag-Korrelation (Issue #53): gleiche TDEE-Logik wie nutrition_metrics / nutrition_viz
+MIN_PAIRS_LAG_CORR = 15
+LAG_CORR_LOOKBACK_DAYS = 120
+
 def calculate_lag_correlation(profile_id: str, var1: str, var2: str, max_lag_days: int = 14) -> Optional[Dict]:
     """
-    Calculate lagged correlation between two variables
+    Pearson-Korrelation mit Lag-Sweep (Issue 53, Data-Layer).
 
-    Args:
-        var1: 'energy', 'protein', 'training_load'
-        var2: 'weight', 'lbm', 'hrv', 'rhr'
-        max_lag_days: Maximum lag to test
+    C1: Tagesbilanz (kcal − TDEE wie ``estimate_tdee_kcal_from_latest_weight``) vs. ΔGewicht [t→t+L], L≥1.
+    C2: Protein (g) vs. ΔMager [t→t+L] aus ``build_merged_daily_nutrition_body_rows``, L≥1.
+    C3: Summe ``duration_min`` pro Tag vs. HRV oder Ruhepuls am Tag t+L (L≥0).
 
-    Returns:
-        {
-            'best_lag': X,  # days
-            'correlation': 0.XX,  # -1 to 1
-            'direction': 'positive'/'negative'/'none',
-            'confidence': 'high'/'medium'/'low',
-            'data_points': N
-        }
+    Rückgabe enthält u. a. ``best_lag`` / ``best_lag_days``, ``correlation``, ``interpretation``,
+    optional ``lag_details`` (r, n je Lag), mindestens ``MIN_PAIRS_LAG_CORR`` Paare am besten Lag.
     """
     v1 = (var1 or "").strip().lower()
     if v1 in ("energy", "energy_balance"):
@@ -70,85 +71,349 @@ def _normalize_lag_payload(raw: Optional[Dict]) -> Optional[Dict]:
     return out
 
 
+def _iso_date_key(d: Any) -> str:
+    if d is None:
+        return ""
+    if hasattr(d, "isoformat"):
+        return str(d.isoformat())[:10]
+    s = str(d)
+    return s[:10] if len(s) >= 10 else s
+
+
+def _parse_iso_to_date(ds: str) -> Optional[date]:
+    if not ds or len(ds) < 10:
+        return None
+    try:
+        return date.fromisoformat(ds[:10])
+    except ValueError:
+        return None
+
+
+def _pearson_r(xs: List[float], ys: List[float]) -> Optional[float]:
+    """Pearson-Korrelation; mindestens ``MIN_PAIRS_LAG_CORR`` Paare."""
+    n = len(xs)
+    if n < MIN_PAIRS_LAG_CORR or n != len(ys):
+        return None
+    mx = sum(xs) / n
+    my = sum(ys) / n
+    num = sum((xs[i] - mx) * (ys[i] - my) for i in range(n))
+    dx = sum((xs[i] - mx) ** 2 for i in range(n))
+    dy = sum((ys[i] - my) ** 2 for i in range(n))
+    if dx <= 1e-12 or dy <= 1e-12:
+        return None
+    r = num / ((dx**0.5) * (dy**0.5))
+    return float(max(-1.0, min(1.0, r)))
+
+
+def _direction_from_r(r: float) -> str:
+    if r > 0.05:
+        return "positive"
+    if r < -0.05:
+        return "negative"
+    return "none"
+
+
+def _lag_confidence(n_pairs: int, r: float) -> str:
+    return calculate_correlation_confidence(n_pairs, abs(r))
+
+
 def _correlate_energy_weight(profile_id: str, max_lag: int) -> Optional[Dict]:
     """
-    Correlate energy balance with weight change
-    Test lags: 0, 3, 7, 10, 14 days
+    Pearson: Tagesbilanz (kcal − TDEE wie nutrition_metrics) vs. Gewichtsdifferenz
+    vom Tag t zu Tag t+L (L = 0 … max_lag). Bestes Lag nach maximalem |r|.
     """
+    tdee = estimate_tdee_kcal_from_latest_weight(profile_id)
+    if tdee is None or float(tdee) <= 0:
+        return {
+            "best_lag": None,
+            "correlation": None,
+            "direction": "none",
+            "confidence": "insufficient",
+            "data_points": 0,
+            "interpretation": "Keine TDEE-Schätzung möglich (Gewicht/Demografie).",
+            "reason": "no_tdee",
+        }
+
+    tdee_f = float(tdee)
+    cutoff = (datetime.now() - timedelta(days=LAG_CORR_LOOKBACK_DAYS)).strftime("%Y-%m-%d")
+
     with get_db() as conn:
         cur = get_cursor(conn)
+        cur.execute(
+            """
+            SELECT date::date AS d, SUM(kcal)::float AS kcal
+            FROM nutrition_log
+            WHERE profile_id = %s AND date >= %s::date AND kcal IS NOT NULL
+            GROUP BY date
+            ORDER BY date
+            """,
+            (profile_id, cutoff),
+        )
+        kcal_rows = cur.fetchall()
+        cur.execute(
+            """
+            SELECT date::date AS d, weight::float AS weight
+            FROM weight_log
+            WHERE profile_id = %s AND date >= %s::date AND weight IS NOT NULL
+            ORDER BY date
+            """,
+            (profile_id, cutoff),
+        )
+        w_rows = cur.fetchall()
 
-        # Get energy balance data (daily calories - estimated TDEE)
-        cur.execute("""
-            SELECT n.date, n.kcal, w.weight
-            FROM nutrition_log n
-            LEFT JOIN weight_log w ON w.profile_id = n.profile_id
-                AND w.date = n.date
-            WHERE n.profile_id = %s
-              AND n.date >= CURRENT_DATE - INTERVAL '90 days'
-            ORDER BY n.date
-        """, (profile_id,))
+    kcal_by: Dict[str, float] = {}
+    for r in kcal_rows:
+        kcal_by[_iso_date_key(r["d"])] = float(r["kcal"] or 0)
+    weight_by: Dict[str, float] = {}
+    for r in w_rows:
+        weight_by[_iso_date_key(r["d"])] = float(r["weight"])
 
-        data = cur.fetchall()
+    balance_by = {d: kcal_by[d] - tdee_f for d in kcal_by}
 
-        if len(data) < 30:
-            return {
-                'best_lag': None,
-                'correlation': None,
-                'direction': 'none',
-                'confidence': 'low',
-                'data_points': len(data),
-                'reason': 'Insufficient data (<30 days)'
-            }
+    best: Optional[Tuple[int, float, int, List[Tuple[int, float]]]] = None
+    lag_details: List[Dict[str, Any]] = []
 
-    # Calculate 7d rolling energy balance
-    # (Simplified - actual implementation would need TDEE estimation)
+    max_l = max(0, min(int(max_lag), 28))
+    # Lag 0: ΔGewicht am selben Tag ist immer 0 → sinnvoll erst ab Tag 1
+    for lag in range(1, max_l + 1):
+        xs: List[float] = []
+        ys: List[float] = []
+        for ds in sorted(balance_by.keys()):
+            d0 = _parse_iso_to_date(ds)
+            if d0 is None:
+                continue
+            d1 = d0 + timedelta(days=lag)
+            ds1 = d1.isoformat()
+            w0 = weight_by.get(ds)
+            w1 = weight_by.get(ds1)
+            if w0 is None or w1 is None:
+                continue
+            xs.append(balance_by[ds])
+            ys.append(w1 - w0)
+        r = _pearson_r(xs, ys)
+        n_p = len(xs)
+        lag_details.append({"lag": lag, "n_pairs": n_p, "r": None if r is None else round(r, 4)})
+        if r is None:
+            continue
+        if best is None or abs(r) > abs(best[1]):
+            best = (lag, r, n_p)
+
+    if best is None:
+        return {
+            "best_lag": None,
+            "correlation": None,
+            "direction": "none",
+            "confidence": "insufficient",
+            "data_points": 0,
+            "interpretation": "Zu wenige gepaarte Tage mit Ernährung, Gewicht und gewähltem Lag.",
+            "reason": "insufficient_pairs",
+            "lag_details": lag_details,
+            "tdee_kcal_used": round(tdee_f, 0),
+        }
+
+    lag_b, r_b, n_b, _ = best
+    direction = _direction_from_r(r_b)
+    conf = _lag_confidence(n_b, r_b)
+    interp = (
+        f"Tagesbilanz (kcal − TDEE ~{tdee_f:.0f}) vs. Gewichtsänderung nach {lag_b} Tagen: "
+        f"r ≈ {r_b:.2f} ({direction}). "
+        f"Basierend auf {n_b} Kalendertagen mit vollständigen Paaren."
+    )
 
-    # For now, return placeholder
     return {
-        'best_lag': 7,
-        'correlation': -0.45,  # Placeholder
-        'direction': 'negative',  # Higher deficit = lower weight (expected)
-        'confidence': 'medium',
-        'data_points': len(data)
+        "best_lag": lag_b,
+        "correlation": round(r_b, 4),
+        "direction": direction,
+        "confidence": conf,
+        "data_points": n_b,
+        "interpretation": interp,
+        "lag_details": lag_details,
+        "tdee_kcal_used": round(tdee_f, 0),
     }
 
 
 def _correlate_protein_lbm(profile_id: str, max_lag: int) -> Optional[Dict]:
-    """Correlate protein intake with LBM trend"""
-    # TODO: Implement full correlation calculation
+    """
+    Pearson: Protein (g/Tag) vs. Magermasse-Differenz (kg) vom Tag t zu t+L.
+    Datenbasis: nutrition_body_merge (Caliper-LBM forward-filled wie Ernährungs-Verlauf).
+    """
+    merged = build_merged_daily_nutrition_body_rows(profile_id)
+    if not merged:
+        return {
+            "best_lag": None,
+            "correlation": None,
+            "direction": "none",
+            "confidence": "insufficient",
+            "data_points": 0,
+            "interpretation": "Keine zusammengeführten Ernährungs-/Körperdaten.",
+            "reason": "no_merged_rows",
+        }
+
+    protein_by: Dict[str, float] = {}
+    lbm_by: Dict[str, float] = {}
+    for row in merged:
+        ds = _iso_date_key(row.get("date"))
+        if not ds:
+            continue
+        pg = row.get("protein_g")
+        lm = row.get("lean_mass")
+        if pg is not None:
+            protein_by[ds] = float(pg)
+        if lm is not None:
+            lbm_by[ds] = float(lm)
+
+    best: Optional[Tuple[int, float, int]] = None
+    lag_details: List[Dict[str, Any]] = []
+    max_l = max(0, min(int(max_lag), 28))
+
+    for lag in range(1, max_l + 1):
+        xs: List[float] = []
+        ys: List[float] = []
+        for ds in sorted(protein_by.keys()):
+            if ds not in lbm_by:
+                continue
+            d0 = _parse_iso_to_date(ds)
+            if d0 is None:
+                continue
+            d1 = d0 + timedelta(days=lag)
+            ds1 = d1.isoformat()
+            if ds1 not in lbm_by:
+                continue
+            xs.append(protein_by[ds])
+            ys.append(lbm_by[ds1] - lbm_by[ds])
+        r = _pearson_r(xs, ys)
+        n_p = len(xs)
+        lag_details.append({"lag": lag, "n_pairs": n_p, "r": None if r is None else round(r, 4)})
+        if r is None:
+            continue
+        if best is None or abs(r) > abs(best[1]):
+            best = (lag, r, n_p)
+
+    if best is None:
+        return {
+            "best_lag": None,
+            "correlation": None,
+            "direction": "none",
+            "confidence": "insufficient",
+            "data_points": 0,
+            "interpretation": "Zu wenige Tage mit Protein und Magermasse (Caliper) für die gewählten Lags.",
+            "reason": "insufficient_pairs",
+            "lag_details": lag_details,
+        }
+
+    lag_b, r_b, n_b = best
+    direction = _direction_from_r(r_b)
+    conf = _lag_confidence(n_b, r_b)
+    interp = (
+        f"Protein (g/Tag) vs. Magermasse-Änderung nach {lag_b} Tagen: r ≈ {r_b:.2f} ({direction}). "
+        f"{n_b} gepaarte Tage."
+    )
+
     return {
-        'best_lag': 0,
-        'correlation': 0.32,  # Placeholder
-        'direction': 'positive',
-        'confidence': 'medium',
-        'data_points': 28
+        "best_lag": lag_b,
+        "correlation": round(r_b, 4),
+        "direction": direction,
+        "confidence": conf,
+        "data_points": n_b,
+        "interpretation": interp,
+        "lag_details": lag_details,
     }
 
 
 def _correlate_load_vitals(profile_id: str, vital: str, max_lag: int) -> Optional[Dict]:
     """
-    Correlate training load with HRV or RHR
-    Test lags: 1, 2, 3 days
+    Pearson: Tages-Trainingslast (Summe duration_min) vs. Vitals (HRV ms oder Ruhepuls)
+    am Kalendertag t+Lag (typisch: Belastung am Vortag, Vitalwert am Folgetag bei Lag ≥ 1).
     """
-    # TODO: Implement full correlation calculation
-    if vital == 'hrv':
+    col = "hrv" if vital == "hrv" else "resting_hr"
+    cutoff = (datetime.now() - timedelta(days=LAG_CORR_LOOKBACK_DAYS)).strftime("%Y-%m-%d")
+
+    with get_db() as conn:
+        cur = get_cursor(conn)
+        cur.execute(
+            """
+            SELECT date::text AS d, COALESCE(SUM(duration_min), 0)::float AS minutes
+            FROM activity_log
+            WHERE profile_id = %s AND date >= %s::date
+              AND duration_min IS NOT NULL AND duration_min > 0
+            GROUP BY date
+            ORDER BY date
+            """,
+            (profile_id, cutoff),
+        )
+        load_rows = cur.fetchall()
+        cur.execute(
+            f"""
+            SELECT date::text AS d, {col}::float AS v
+            FROM vitals_baseline
+            WHERE profile_id = %s AND date >= %s::date AND {col} IS NOT NULL
+            ORDER BY date
+            """,
+            (profile_id, cutoff),
+        )
+        vit_rows = cur.fetchall()
+
+    load_by = {str(r["d"])[:10]: float(r["minutes"] or 0) for r in load_rows}
+    vital_by = {str(r["d"])[:10]: float(r["v"]) for r in vit_rows}
+
+    best: Optional[Tuple[int, float, int]] = None
+    lag_details: List[Dict[str, Any]] = []
+    max_l = max(0, min(int(max_lag), 28))
+    vlabel = "HRV (ms)" if vital == "hrv" else "Ruhepuls (bpm)"
+
+    for lag in range(0, max_l + 1):
+        xs: List[float] = []
+        ys: List[float] = []
+        for ds in sorted(load_by.keys()):
+            d0 = _parse_iso_to_date(ds)
+            if d0 is None:
+                continue
+            d1 = d0 + timedelta(days=lag)
+            ds1 = d1.isoformat()
+            if ds1 not in vital_by:
+                continue
+            xs.append(load_by[ds])
+            ys.append(vital_by[ds1])
+        r = _pearson_r(xs, ys)
+        n_p = len(xs)
+        lag_details.append({"lag": lag, "n_pairs": n_p, "r": None if r is None else round(r, 4)})
+        if r is None:
+            continue
+        if best is None or abs(r) > abs(best[1]):
+            best = (lag, r, n_p)
+
+    if best is None:
         return {
-            'best_lag': 1,
-            'correlation': -0.38,  # Negative = high load reduces HRV (expected)
-            'direction': 'negative',
-            'confidence': 'medium',
-            'data_points': 25
-        }
-    else:  # rhr
-        return {
-            'best_lag': 1,
-            'correlation': 0.42,  # Positive = high load increases RHR (expected)
-            'direction': 'positive',
-            'confidence': 'medium',
-            'data_points': 25
+            "best_lag": None,
+            "correlation": None,
+            "direction": "none",
+            "confidence": "insufficient",
+            "data_points": 0,
+            "interpretation": f"Zu wenige gepaarte Tage mit Training und {vlabel}.",
+            "reason": "insufficient_pairs",
+            "lag_details": lag_details,
+            "vital": vital,
         }
 
+    lag_b, r_b, n_b = best
+    direction = _direction_from_r(r_b)
+    conf = _lag_confidence(n_b, r_b)
+    interp = (
+        f"Trainingsminuten/Tag vs. {vlabel} nach {lag_b} Tagen Lag: r ≈ {r_b:.2f} ({direction}). "
+        f"{n_b} Paare."
+    )
+
+    return {
+        "best_lag": lag_b,
+        "correlation": round(r_b, 4),
+        "direction": direction,
+        "confidence": conf,
+        "data_points": n_b,
+        "interpretation": interp,
+        "lag_details": lag_details,
+        "vital": vital,
+    }
+
 
 # ============================================================================
 # C4: Sleep vs. Recovery Correlation
diff --git a/backend/routers/charts.py b/backend/routers/charts.py
index 8578beb..220f8c0 100644
--- a/backend/routers/charts.py
+++ b/backend/routers/charts.py
@@ -1115,6 +1115,9 @@ def get_weight_energy_correlation_chart(
     corr_data = calculate_lag_correlation(profile_id, "energy_balance", "weight", max_lag)
 
     if not corr_data or corr_data.get('correlation') is None:
+        msg = "Nicht genug Daten für Korrelationsanalyse"
+        if isinstance(corr_data, dict):
+            msg = str(corr_data.get("interpretation") or corr_data.get("reason") or msg)
         return {
             "chart_type": "scatter",
             "data": {
@@ -1123,14 +1126,15 @@ def get_weight_energy_correlation_chart(
             },
             "metadata": {
                 "confidence": "insufficient",
-                "data_points": 0,
-                "message": "Nicht genug Daten für Korrelationsanalyse"
+                "data_points": corr_data.get("data_points", 0) if isinstance(corr_data, dict) else 0,
+                "message": msg,
+                "lag_details": corr_data.get("lag_details") if isinstance(corr_data, dict) else None,
+                "tdee_kcal_used": corr_data.get("tdee_kcal_used") if isinstance(corr_data, dict) else None,
             }
         }
 
-    # Create lag vs correlation data for chart
-    # For simplicity, show best lag point as single data point
-    best_lag = corr_data.get('best_lag_days', 0)
+    # Ein Punkt: bestes Lag (max. |r|) — Berechnung in data_layer.correlations (Issue 53)
+    best_lag = corr_data.get('best_lag_days', corr_data.get('best_lag', 0))
     correlation = corr_data.get('correlation', 0)
 
     return {
@@ -1150,10 +1154,13 @@ def get_weight_energy_correlation_chart(
         },
         "metadata": {
             "confidence": corr_data.get('confidence', 'low'),
-            "correlation": round(correlation, 3),
+            "correlation": round(float(correlation), 3),
             "best_lag_days": best_lag,
             "interpretation": corr_data.get('interpretation', ''),
-            "data_points": corr_data.get('data_points', 0)
+            "data_points": corr_data.get('data_points', 0),
+            "lag_details": corr_data.get("lag_details"),
+            "tdee_kcal_used": corr_data.get("tdee_kcal_used"),
+            "layer_1": "correlations._correlate_energy_weight",
         }
     }
 
@@ -1180,6 +1187,9 @@ def get_lbm_protein_correlation_chart(
     corr_data = calculate_lag_correlation(profile_id, "protein", "lbm", max_lag)
 
     if not corr_data or corr_data.get('correlation') is None:
+        msg = "Nicht genug Daten für LBM-Protein Korrelation"
+        if isinstance(corr_data, dict):
+            msg = str(corr_data.get("interpretation") or corr_data.get("reason") or msg)
         return {
             "chart_type": "scatter",
             "data": {
@@ -1188,12 +1198,13 @@ def get_lbm_protein_correlation_chart(
             },
             "metadata": {
                 "confidence": "insufficient",
-                "data_points": 0,
-                "message": "Nicht genug Daten für LBM-Protein Korrelation"
+                "data_points": corr_data.get("data_points", 0) if isinstance(corr_data, dict) else 0,
+                "message": msg,
+                "lag_details": corr_data.get("lag_details") if isinstance(corr_data, dict) else None,
             }
         }
 
-    best_lag = corr_data.get('best_lag_days', 0)
+    best_lag = corr_data.get('best_lag_days', corr_data.get('best_lag', 0))
     correlation = corr_data.get('correlation', 0)
 
     return {
@@ -1213,10 +1224,12 @@ def get_lbm_protein_correlation_chart(
         },
         "metadata": {
             "confidence": corr_data.get('confidence', 'low'),
-            "correlation": round(correlation, 3),
+            "correlation": round(float(correlation), 3),
             "best_lag_days": best_lag,
             "interpretation": corr_data.get('interpretation', ''),
-            "data_points": corr_data.get('data_points', 0)
+            "data_points": corr_data.get('data_points', 0),
+            "lag_details": corr_data.get("lag_details"),
+            "layer_1": "correlations._correlate_protein_lbm",
         }
     }
 
@@ -1240,35 +1253,54 @@ def get_load_vitals_correlation_chart(
     """
     profile_id = session['profile_id']
 
-    # Try HRV first
     corr_hrv = calculate_lag_correlation(profile_id, "load", "hrv", max_lag)
     corr_rhr = calculate_lag_correlation(profile_id, "load", "rhr", max_lag)
 
-    # Use whichever has stronger correlation
-    if corr_hrv and corr_rhr:
-        corr_data = corr_hrv if abs(corr_hrv.get('correlation', 0)) > abs(corr_rhr.get('correlation', 0)) else corr_rhr
-        metric_name = "HRV" if corr_data == corr_hrv else "RHR"
-    elif corr_hrv:
-        corr_data = corr_hrv
-        metric_name = "HRV"
-    elif corr_rhr:
-        corr_data = corr_rhr
-        metric_name = "RHR"
-    else:
+    def _abs_corr(c):
+        if not c or c.get("correlation") is None:
+            return -1.0
+        try:
+            return abs(float(c["correlation"]))
+        except (TypeError, ValueError):
+            return -1.0
+
+    if _abs_corr(corr_hrv) < 0 and _abs_corr(corr_rhr) < 0:
+        msg = "Nicht genug Daten für Load-Vitals Korrelation"
+        h_msg = corr_hrv.get("interpretation") if isinstance(corr_hrv, dict) else None
+        r_msg = corr_rhr.get("interpretation") if isinstance(corr_rhr, dict) else None
+        if h_msg or r_msg:
+            msg = f"HRV: {h_msg or '—'} · RHR: {r_msg or '—'}"
         return {
             "chart_type": "scatter",
-            "data": {
-                "labels": [],
-                "datasets": []
-            },
+            "data": {"labels": [], "datasets": []},
             "metadata": {
                 "confidence": "insufficient",
                 "data_points": 0,
-                "message": "Nicht genug Daten für Load-Vitals Korrelation"
-            }
+                "message": msg,
+                "lag_details_hrv": corr_hrv.get("lag_details") if isinstance(corr_hrv, dict) else None,
+                "lag_details_rhr": corr_rhr.get("lag_details") if isinstance(corr_rhr, dict) else None,
+            },
         }
 
-    best_lag = corr_data.get('best_lag_days', 0)
+    if _abs_corr(corr_hrv) >= _abs_corr(corr_rhr):
+        corr_data = corr_hrv
+        metric_name = "HRV"
+    else:
+        corr_data = corr_rhr
+        metric_name = "RHR"
+
+    if not corr_data or corr_data.get("correlation") is None:
+        return {
+            "chart_type": "scatter",
+            "data": {"labels": [], "datasets": []},
+            "metadata": {
+                "confidence": "insufficient",
+                "data_points": 0,
+                "message": str(corr_data.get("interpretation") or "Nicht genug Daten für Load-Vitals Korrelation"),
+            },
+        }
+
+    best_lag = corr_data.get('best_lag_days', corr_data.get('best_lag', 0))
     correlation = corr_data.get('correlation', 0)
 
     return {
@@ -1288,11 +1320,13 @@ def get_load_vitals_correlation_chart(
         },
         "metadata": {
             "confidence": corr_data.get('confidence', 'low'),
-            "correlation": round(correlation, 3),
+            "correlation": round(float(correlation), 3),
             "best_lag_days": best_lag,
             "metric": metric_name,
             "interpretation": corr_data.get('interpretation', ''),
-            "data_points": corr_data.get('data_points', 0)
+            "data_points": corr_data.get('data_points', 0),
+            "lag_details": corr_data.get("lag_details"),
+            "layer_1": "correlations._correlate_load_vitals",
         }
     }
 
-- 
2.43.0


From 0365d9eb52a830c702b846d8edd7f767ffd58024 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 21 Apr 2026 08:08:17 +0200
Subject: [PATCH 2/3] feat: improve history overview visualization and data
 handling

- Added `safe_float` utility to enhance float handling in correlation calculations, preventing potential errors.
- Refactored lag correlation logic in `get_history_overview_viz_bundle` to utilize absolute values safely, improving accuracy in metric comparisons.
- Enhanced nutrition body merge logic to ensure proper date handling and data integrity, optimizing the retrieval of nutrition and weight logs.
- Introduced new functions in the frontend for processing lag details, improving the visualization of correlation data in the History page.
---
 backend/data_layer/history_overview_viz.py |  9 +-
 backend/data_layer/nutrition_body_merge.py | 33 ++++++--
 frontend/src/pages/History.jsx             | 99 +++++++++++++++++++++-
 3 files changed, 128 insertions(+), 13 deletions(-)

diff --git a/backend/data_layer/history_overview_viz.py b/backend/data_layer/history_overview_viz.py
index 40627f9..4d278c2 100644
--- a/backend/data_layer/history_overview_viz.py
+++ b/backend/data_layer/history_overview_viz.py
@@ -13,6 +13,7 @@ from data_layer.correlations import calculate_lag_correlation, calculate_top_dri
 from data_layer.fitness_viz import get_fitness_dashboard_viz_bundle
 from data_layer.nutrition_viz import get_nutrition_history_viz_bundle
 from data_layer.recovery_viz import get_recovery_dashboard_viz_bundle
+from data_layer.utils import safe_float
 
 
 def _take_kpis(tiles: Any, max_n: int = 4) -> List[Dict[str, Any]]:
@@ -90,11 +91,9 @@ def get_history_overview_viz_bundle(profile_id: str, days: int) -> Dict[str, Any
     c3_rhr = calculate_lag_correlation(profile_id, "load", "rhr", 14)
     c3 = None
     if c3_hrv and c3_rhr:
-        c3 = (
-            c3_hrv
-            if abs(float(c3_hrv.get("correlation") or 0)) >= abs(float(c3_rhr.get("correlation") or 0))
-            else c3_rhr
-        )
+        a1 = abs(safe_float(c3_hrv.get("correlation"), 0.0))
+        a2 = abs(safe_float(c3_rhr.get("correlation"), 0.0))
+        c3 = c3_hrv if a1 >= a2 else c3_rhr
         if c3 is c3_hrv:
             c3 = dict(c3)
             c3["metric"] = "HRV"
diff --git a/backend/data_layer/nutrition_body_merge.py b/backend/data_layer/nutrition_body_merge.py
index 3263c45..ac8768f 100644
--- a/backend/data_layer/nutrition_body_merge.py
+++ b/backend/data_layer/nutrition_body_merge.py
@@ -9,7 +9,7 @@ from __future__ import annotations
 from typing import Any, Dict, List, Optional
 
 from db import get_db, get_cursor, r2d
-from caliper_composition import compute_lean_fat_kg, nearest_weight_kg_from_map
+from caliper_composition import as_date, compute_lean_fat_kg, nearest_weight_kg_from_map
 
 
 def build_merged_daily_nutrition_body_rows(profile_id: str) -> List[Dict[str, Any]]:
@@ -20,21 +20,42 @@ def build_merged_daily_nutrition_body_rows(profile_id: str) -> List[Dict[str, An
     with get_db() as conn:
         cur = get_cursor(conn)
         cur.execute("SELECT * FROM nutrition_log WHERE profile_id=%s ORDER BY date", (profile_id,))
-        nutr = {r["date"]: r2d(r) for r in cur.fetchall()}
+        nutr: Dict[Any, Dict[str, Any]] = {}
+        for r in cur.fetchall():
+            rd = r2d(r)
+            dk = as_date(rd.get("date"))
+            if dk is not None:
+                nutr[dk] = rd
         cur.execute("SELECT date, weight FROM weight_log WHERE profile_id=%s ORDER BY date", (profile_id,))
-        wlog = {r["date"]: r["weight"] for r in cur.fetchall()}
+        wlog: Dict[Any, Any] = {}
+        for r in cur.fetchall():
+            rd = r2d(r)
+            dk = as_date(rd.get("date"))
+            if dk is not None:
+                wlog[dk] = rd["weight"]
         cur.execute(
             "SELECT date, lean_mass, body_fat_pct FROM caliper_log WHERE profile_id=%s ORDER BY date",
             (profile_id,),
         )
-        cals = sorted([r2d(r) for r in cur.fetchall()], key=lambda x: x["date"])
+        cals = [r2d(r) for r in cur.fetchall()]
+        cals = sorted(
+            [c for c in cals if as_date(c.get("date")) is not None],
+            key=lambda x: as_date(x["date"]),
+        )
 
-    all_dates = sorted(set(list(nutr.keys()) + list(wlog.keys())))
+    # Alle Keys sind datetime.date — vermeidet TypeError bei Vergleichen (str vs date)
+    all_dates = sorted(set(nutr.keys()) | set(wlog.keys()))
     mi = 0
     last_cal: Dict[str, Any] = {}
     cal_by_date: Dict[Any, Dict[str, Any]] = {}
     for d in all_dates:
-        while mi < len(cals) and cals[mi]["date"] <= d:
+        while mi < len(cals):
+            cd = as_date(cals[mi].get("date"))
+            if cd is None:
+                mi += 1
+                continue
+            if cd > d:
+                break
             last_cal = cals[mi]
             mi += 1
         if last_cal:
diff --git a/frontend/src/pages/History.jsx b/frontend/src/pages/History.jsx
index 95b76db..d33d09c 100644
--- a/frontend/src/pages/History.jsx
+++ b/frontend/src/pages/History.jsx
@@ -1205,6 +1205,34 @@ function chartJsScatterPoints(payload) {
   return raw.map((p) => ({ x: Number(p.x), y: Number(p.y) }))
 }
 
+/** Backend metadata.lag_details: [{ lag, n_pairs, r }] — für Lag-Kurve L → r (C3: ggf. lag_details_hrv / lag_details_rhr) */
+function lagDetailsToCurve(meta) {
+  let ld = meta?.lag_details
+  if (!Array.isArray(ld) || ld.length === 0) {
+    const m = String(meta?.metric || '').toUpperCase()
+    if (m === 'HRV' && Array.isArray(meta?.lag_details_hrv)) ld = meta.lag_details_hrv
+    else if (m === 'RHR' && Array.isArray(meta?.lag_details_rhr)) ld = meta.lag_details_rhr
+    else {
+      const h = meta?.lag_details_hrv
+      const r = meta?.lag_details_rhr
+      const hl = Array.isArray(h) ? h.length : 0
+      const rl = Array.isArray(r) ? r.length : 0
+      if (hl >= rl && hl > 0) ld = h
+      else if (rl > 0) ld = r
+      else ld = []
+    }
+  }
+  if (!Array.isArray(ld) || ld.length === 0) return []
+  return ld
+    .map((d) => ({
+      lag: Number(d?.lag),
+      r: d?.r == null || d?.r === '' ? null : Number(d.r),
+      n_pairs: d?.n_pairs != null ? Number(d.n_pairs) : null,
+    }))
+    .filter((d) => Number.isFinite(d.lag) && d.r != null && Number.isFinite(d.r))
+    .sort((a, b) => a.lag - b.lag)
+}
+
 function driverBarFromStatus(st) {
   const s = String(st || '').toLowerCase()
   if (s.includes('hinder')) return { v: -1, fill: 'var(--danger)' }
@@ -1240,10 +1268,13 @@ function chartJsBarRows(payload, fallbackDrivers) {
 function CorrelationScatterTile({ title, accent, payload }) {
   const meta = payload?.metadata || {}
   const pts = chartJsScatterPoints(payload)
+  const curve = lagDetailsToCurve(meta)
   const hasChart = pts.length > 0 && meta.correlation != null
   const r = Number(meta.correlation)
   const strength =
     !Number.isFinite(r) ? 'bad' : Math.abs(r) >= 0.35 ? 'good' : Math.abs(r) >= 0.15 ? 'warn' : 'bad'
+  const bestLag = meta.best_lag_days != null ? Number(meta.best_lag_days) : null
+  const maxLagAxis = curve.length ? Math.max(14, ...curve.map((d) => d.lag), bestLag || 0) : 28
 
   return (
     <div
@@ -1257,12 +1288,76 @@ function CorrelationScatterTile({ title, accent, payload }) {
       <div style={{ fontSize: 11, fontWeight: 700, color: 'var(--text1)', marginBottom: 4 }}>{title}</div>
       <div style={{ fontSize: 10, color: 'var(--text3)', lineHeight: 1.35, marginBottom: 6 }}>
         r = {meta.correlation != null ? Number(meta.correlation).toFixed(3) : '—'}
-        {meta.best_lag_days != null ? ` · Lag ${meta.best_lag_days} T` : ''}
+        {meta.best_lag_days != null ? ` · bestes Lag ${meta.best_lag_days} T` : ''}
         {meta.metric ? ` · ${meta.metric}` : ''}
         {meta.confidence ? ` · ${meta.confidence}` : ''}
       </div>
       {!hasChart ? (
-        <div style={{ fontSize: 11, color: 'var(--text3)' }}>{meta.message || 'Keine Daten für diese Korrelation.'}</div>
+        <>
+          <div style={{ fontSize: 11, color: 'var(--text3)', marginBottom: curve.length ? 8 : 0 }}>
+            {meta.message || 'Keine Daten für diese Korrelation.'}
+          </div>
+          {curve.length > 0 && (
+            <div style={{ fontSize: 10, color: 'var(--text3)', marginBottom: 6 }}>
+              Lag-Sweep (kein Lag mit ≥15 Paaren): r über Lags — nur zur Einordnung.
+            </div>
+          )}
+          {curve.length > 0 && (
+            <ResponsiveContainer width="100%" height={120}>
+              <ComposedChart data={curve} margin={{ top: 4, right: 6, bottom: 4, left: -14 }}>
+                <CartesianGrid strokeDasharray="3 3" stroke="var(--border)" />
+                <XAxis dataKey="lag" type="number" domain={[0, maxLagAxis]} tick={{ fontSize: 9, fill: 'var(--text3)' }} label={{ value: 'Lag (T)', fontSize: 9, fill: 'var(--text3)', offset: -2 }} />
+                <YAxis dataKey="r" domain={[-1, 1]} tick={{ fontSize: 9, fill: 'var(--text3)' }} width={36} label={{ value: 'r', fontSize: 9, fill: 'var(--text3)', angle: -90 }} />
+                <ReferenceLine y={0} stroke="var(--text3)" strokeDasharray="4 4" />
+                <Tooltip
+                  contentStyle={{ background: 'var(--surface)', border: '1px solid var(--border)', borderRadius: 8, fontSize: 10 }}
+                  formatter={(v, _n, item) => [`r = ${Number(v).toFixed(3)}`, `Lag ${item?.payload?.lag} T · n = ${item?.payload?.n_pairs ?? '—'}`]}
+                />
+                <Line type="monotone" dataKey="r" stroke={accent} strokeWidth={2} dot={{ r: 3, fill: accent }} isAnimationActive={false} />
+              </ComposedChart>
+            </ResponsiveContainer>
+          )}
+        </>
+      ) : curve.length >= 1 ? (
+        <>
+          <div style={{ fontSize: 9, color: 'var(--text3)', marginBottom: 4 }}>
+            Kurve: Pearson-r je Lag (Tage); starker Punkt = gewähltes bestes Lag.
+          </div>
+          <ResponsiveContainer width="100%" height={132}>
+            <ComposedChart data={curve} margin={{ top: 4, right: 6, bottom: 4, left: -14 }}>
+              <CartesianGrid strokeDasharray="3 3" stroke="var(--border)" />
+              <XAxis dataKey="lag" type="number" domain={[0, maxLagAxis]} tick={{ fontSize: 9, fill: 'var(--text3)' }} />
+              <YAxis dataKey="r" domain={[-1, 1]} tick={{ fontSize: 9, fill: 'var(--text3)' }} width={36} />
+              <ReferenceLine y={0} stroke="var(--text3)" strokeDasharray="4 4" />
+              <Tooltip
+                contentStyle={{ background: 'var(--surface)', border: '1px solid var(--border)', borderRadius: 8, fontSize: 10 }}
+                formatter={(v, _n, item) => [`r = ${Number(v).toFixed(3)}`, `Lag ${item?.payload?.lag} T · n = ${item?.payload?.n_pairs ?? '—'}`]}
+              />
+              <Line
+                type="monotone"
+                dataKey="r"
+                stroke={accent}
+                strokeWidth={2}
+                isAnimationActive={false}
+                dot={(props) => {
+                  const { cx, cy, payload: pl } = props
+                  if (cx == null || cy == null || !pl) return null
+                  const isBest = bestLag != null && Number(pl.lag) === bestLag
+                  return (
+                    <circle
+                      cx={cx}
+                      cy={cy}
+                      r={isBest ? 6 : 3.5}
+                      fill={isBest ? 'var(--surface)' : accent}
+                      stroke={isBest ? accent : 'none'}
+                      strokeWidth={isBest ? 2.5 : 0}
+                    />
+                  )
+                }}
+              />
+            </ComposedChart>
+          </ResponsiveContainer>
+        </>
       ) : (
         <ResponsiveContainer width="100%" height={118}>
           <ScatterChart margin={{ top: 2, right: 4, bottom: 2, left: -18 }}>
-- 
2.43.0


From 1c512b0d0a83bfa0bcb98836b94ee72f177309e4 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Tue, 21 Apr 2026 08:12:21 +0200
Subject: [PATCH 3/3] refactor: simplify best lag value handling in energy
 correlation calculations

- Updated the `_correlate_energy_weight` function to streamline the unpacking of the `best` variable, removing unnecessary tuple elements for improved clarity and efficiency in the correlation logic.
---
 backend/data_layer/correlations.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/data_layer/correlations.py b/backend/data_layer/correlations.py
index a0ed2fc..dd0568c 100644
--- a/backend/data_layer/correlations.py
+++ b/backend/data_layer/correlations.py
@@ -170,7 +170,7 @@ def _correlate_energy_weight(profile_id: str, max_lag: int) -> Optional[Dict]:
 
     balance_by = {d: kcal_by[d] - tdee_f for d in kcal_by}
 
-    best: Optional[Tuple[int, float, int, List[Tuple[int, float]]]] = None
+    best: Optional[Tuple[int, float, int]] = None
     lag_details: List[Dict[str, Any]] = []
 
     max_l = max(0, min(int(max_lag), 28))
@@ -211,7 +211,7 @@ def _correlate_energy_weight(profile_id: str, max_lag: int) -> Optional[Dict]:
             "tdee_kcal_used": round(tdee_f, 0),
         }
 
-    lag_b, r_b, n_b, _ = best
+    lag_b, r_b, n_b = best
     direction = _direction_from_r(r_b)
     conf = _lag_confidence(n_b, r_b)
     interp = (
-- 
2.43.0