feat: enhance lag correlation calculations and chart metadata
All checks were successful
Deploy Development / deploy (push) Successful in 1m3s
Build Test / pytest-backend (push) Successful in 5s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 19s

- Updated `calculate_lag_correlation` to include detailed interpretations and lag details for energy balance vs. weight change, protein vs. lean mass, and load vs. vital metrics.
- Improved handling of insufficient data scenarios in correlation charts, providing clearer messages and metadata for user insights.
- Refactored chart functions to utilize best lag values and correlation data more effectively, enhancing the visualization of relationships between metrics.
This commit is contained in:
Lars 2026-04-21 08:03:43 +02:00
parent 3f6673b636
commit 3106ebedae
2 changed files with 400 additions and 101 deletions

View File

@ -17,28 +17,29 @@ Phase 0c: Multi-Layer Architecture
Version: 1.0
"""
from typing import Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, timedelta, date
from db import get_db, get_cursor, r2d
import statistics
from data_layer.nutrition_body_merge import build_merged_daily_nutrition_body_rows
from data_layer.nutrition_metrics import estimate_tdee_kcal_from_latest_weight
# Lag-Korrelation (Issue #53): gleiche TDEE-Logik wie nutrition_metrics / nutrition_viz
MIN_PAIRS_LAG_CORR = 15
LAG_CORR_LOOKBACK_DAYS = 120
def calculate_lag_correlation(profile_id: str, var1: str, var2: str, max_lag_days: int = 14) -> Optional[Dict]:
"""
Calculate lagged correlation between two variables
Pearson-Korrelation mit Lag-Sweep (Issue 53, Data-Layer).
Args:
var1: 'energy', 'protein', 'training_load'
var2: 'weight', 'lbm', 'hrv', 'rhr'
max_lag_days: Maximum lag to test
C1: Tagesbilanz (kcal TDEE wie ``estimate_tdee_kcal_from_latest_weight``) vs. ΔGewicht [tt+L], L1.
C2: Protein (g) vs. ΔMager [tt+L] aus ``build_merged_daily_nutrition_body_rows``, L1.
C3: Summe ``duration_min`` pro Tag vs. HRV oder Ruhepuls am Tag t+L (L0).
Returns:
{
'best_lag': X, # days
'correlation': 0.XX, # -1 to 1
'direction': 'positive'/'negative'/'none',
'confidence': 'high'/'medium'/'low',
'data_points': N
}
Rückgabe enthält u. a. ``best_lag`` / ``best_lag_days``, ``correlation``, ``interpretation``,
optional ``lag_details`` (r, n je Lag), mindestens ``MIN_PAIRS_LAG_CORR`` Paare am besten Lag.
"""
v1 = (var1 or "").strip().lower()
if v1 in ("energy", "energy_balance"):
@ -70,85 +71,349 @@ def _normalize_lag_payload(raw: Optional[Dict]) -> Optional[Dict]:
return out
def _iso_date_key(d: Any) -> str:
if d is None:
return ""
if hasattr(d, "isoformat"):
return str(d.isoformat())[:10]
s = str(d)
return s[:10] if len(s) >= 10 else s
def _parse_iso_to_date(ds: str) -> Optional[date]:
if not ds or len(ds) < 10:
return None
try:
return date.fromisoformat(ds[:10])
except ValueError:
return None
def _pearson_r(xs: List[float], ys: List[float]) -> Optional[float]:
"""Pearson-Korrelation; mindestens ``MIN_PAIRS_LAG_CORR`` Paare."""
n = len(xs)
if n < MIN_PAIRS_LAG_CORR or n != len(ys):
return None
mx = sum(xs) / n
my = sum(ys) / n
num = sum((xs[i] - mx) * (ys[i] - my) for i in range(n))
dx = sum((xs[i] - mx) ** 2 for i in range(n))
dy = sum((ys[i] - my) ** 2 for i in range(n))
if dx <= 1e-12 or dy <= 1e-12:
return None
r = num / ((dx**0.5) * (dy**0.5))
return float(max(-1.0, min(1.0, r)))
def _direction_from_r(r: float) -> str:
if r > 0.05:
return "positive"
if r < -0.05:
return "negative"
return "none"
def _lag_confidence(n_pairs: int, r: float) -> str:
return calculate_correlation_confidence(n_pairs, abs(r))
def _correlate_energy_weight(profile_id: str, max_lag: int) -> Optional[Dict]:
"""
Correlate energy balance with weight change
Test lags: 0, 3, 7, 10, 14 days
Pearson: Tagesbilanz (kcal TDEE wie nutrition_metrics) vs. Gewichtsdifferenz
vom Tag t zu Tag t+L (L = 0 max_lag). Bestes Lag nach maximalem |r|.
"""
tdee = estimate_tdee_kcal_from_latest_weight(profile_id)
if tdee is None or float(tdee) <= 0:
return {
"best_lag": None,
"correlation": None,
"direction": "none",
"confidence": "insufficient",
"data_points": 0,
"interpretation": "Keine TDEE-Schätzung möglich (Gewicht/Demografie).",
"reason": "no_tdee",
}
tdee_f = float(tdee)
cutoff = (datetime.now() - timedelta(days=LAG_CORR_LOOKBACK_DAYS)).strftime("%Y-%m-%d")
with get_db() as conn:
cur = get_cursor(conn)
cur.execute(
"""
SELECT date::date AS d, SUM(kcal)::float AS kcal
FROM nutrition_log
WHERE profile_id = %s AND date >= %s::date AND kcal IS NOT NULL
GROUP BY date
ORDER BY date
""",
(profile_id, cutoff),
)
kcal_rows = cur.fetchall()
cur.execute(
"""
SELECT date::date AS d, weight::float AS weight
FROM weight_log
WHERE profile_id = %s AND date >= %s::date AND weight IS NOT NULL
ORDER BY date
""",
(profile_id, cutoff),
)
w_rows = cur.fetchall()
# Get energy balance data (daily calories - estimated TDEE)
cur.execute("""
SELECT n.date, n.kcal, w.weight
FROM nutrition_log n
LEFT JOIN weight_log w ON w.profile_id = n.profile_id
AND w.date = n.date
WHERE n.profile_id = %s
AND n.date >= CURRENT_DATE - INTERVAL '90 days'
ORDER BY n.date
""", (profile_id,))
kcal_by: Dict[str, float] = {}
for r in kcal_rows:
kcal_by[_iso_date_key(r["d"])] = float(r["kcal"] or 0)
weight_by: Dict[str, float] = {}
for r in w_rows:
weight_by[_iso_date_key(r["d"])] = float(r["weight"])
data = cur.fetchall()
balance_by = {d: kcal_by[d] - tdee_f for d in kcal_by}
if len(data) < 30:
return {
'best_lag': None,
'correlation': None,
'direction': 'none',
'confidence': 'low',
'data_points': len(data),
'reason': 'Insufficient data (<30 days)'
}
best: Optional[Tuple[int, float, int, List[Tuple[int, float]]]] = None
lag_details: List[Dict[str, Any]] = []
# Calculate 7d rolling energy balance
# (Simplified - actual implementation would need TDEE estimation)
max_l = max(0, min(int(max_lag), 28))
# Lag 0: ΔGewicht am selben Tag ist immer 0 → sinnvoll erst ab Tag 1
for lag in range(1, max_l + 1):
xs: List[float] = []
ys: List[float] = []
for ds in sorted(balance_by.keys()):
d0 = _parse_iso_to_date(ds)
if d0 is None:
continue
d1 = d0 + timedelta(days=lag)
ds1 = d1.isoformat()
w0 = weight_by.get(ds)
w1 = weight_by.get(ds1)
if w0 is None or w1 is None:
continue
xs.append(balance_by[ds])
ys.append(w1 - w0)
r = _pearson_r(xs, ys)
n_p = len(xs)
lag_details.append({"lag": lag, "n_pairs": n_p, "r": None if r is None else round(r, 4)})
if r is None:
continue
if best is None or abs(r) > abs(best[1]):
best = (lag, r, n_p)
if best is None:
return {
"best_lag": None,
"correlation": None,
"direction": "none",
"confidence": "insufficient",
"data_points": 0,
"interpretation": "Zu wenige gepaarte Tage mit Ernährung, Gewicht und gewähltem Lag.",
"reason": "insufficient_pairs",
"lag_details": lag_details,
"tdee_kcal_used": round(tdee_f, 0),
}
lag_b, r_b, n_b, _ = best
direction = _direction_from_r(r_b)
conf = _lag_confidence(n_b, r_b)
interp = (
f"Tagesbilanz (kcal TDEE ~{tdee_f:.0f}) vs. Gewichtsänderung nach {lag_b} Tagen: "
f"r ≈ {r_b:.2f} ({direction}). "
f"Basierend auf {n_b} Kalendertagen mit vollständigen Paaren."
)
# For now, return placeholder
return {
'best_lag': 7,
'correlation': -0.45, # Placeholder
'direction': 'negative', # Higher deficit = lower weight (expected)
'confidence': 'medium',
'data_points': len(data)
"best_lag": lag_b,
"correlation": round(r_b, 4),
"direction": direction,
"confidence": conf,
"data_points": n_b,
"interpretation": interp,
"lag_details": lag_details,
"tdee_kcal_used": round(tdee_f, 0),
}
def _correlate_protein_lbm(profile_id: str, max_lag: int) -> Optional[Dict]:
"""Correlate protein intake with LBM trend"""
# TODO: Implement full correlation calculation
"""
Pearson: Protein (g/Tag) vs. Magermasse-Differenz (kg) vom Tag t zu t+L.
Datenbasis: nutrition_body_merge (Caliper-LBM forward-filled wie Ernährungs-Verlauf).
"""
merged = build_merged_daily_nutrition_body_rows(profile_id)
if not merged:
return {
"best_lag": None,
"correlation": None,
"direction": "none",
"confidence": "insufficient",
"data_points": 0,
"interpretation": "Keine zusammengeführten Ernährungs-/Körperdaten.",
"reason": "no_merged_rows",
}
protein_by: Dict[str, float] = {}
lbm_by: Dict[str, float] = {}
for row in merged:
ds = _iso_date_key(row.get("date"))
if not ds:
continue
pg = row.get("protein_g")
lm = row.get("lean_mass")
if pg is not None:
protein_by[ds] = float(pg)
if lm is not None:
lbm_by[ds] = float(lm)
best: Optional[Tuple[int, float, int]] = None
lag_details: List[Dict[str, Any]] = []
max_l = max(0, min(int(max_lag), 28))
for lag in range(1, max_l + 1):
xs: List[float] = []
ys: List[float] = []
for ds in sorted(protein_by.keys()):
if ds not in lbm_by:
continue
d0 = _parse_iso_to_date(ds)
if d0 is None:
continue
d1 = d0 + timedelta(days=lag)
ds1 = d1.isoformat()
if ds1 not in lbm_by:
continue
xs.append(protein_by[ds])
ys.append(lbm_by[ds1] - lbm_by[ds])
r = _pearson_r(xs, ys)
n_p = len(xs)
lag_details.append({"lag": lag, "n_pairs": n_p, "r": None if r is None else round(r, 4)})
if r is None:
continue
if best is None or abs(r) > abs(best[1]):
best = (lag, r, n_p)
if best is None:
return {
"best_lag": None,
"correlation": None,
"direction": "none",
"confidence": "insufficient",
"data_points": 0,
"interpretation": "Zu wenige Tage mit Protein und Magermasse (Caliper) für die gewählten Lags.",
"reason": "insufficient_pairs",
"lag_details": lag_details,
}
lag_b, r_b, n_b = best
direction = _direction_from_r(r_b)
conf = _lag_confidence(n_b, r_b)
interp = (
f"Protein (g/Tag) vs. Magermasse-Änderung nach {lag_b} Tagen: r ≈ {r_b:.2f} ({direction}). "
f"{n_b} gepaarte Tage."
)
return {
'best_lag': 0,
'correlation': 0.32, # Placeholder
'direction': 'positive',
'confidence': 'medium',
'data_points': 28
"best_lag": lag_b,
"correlation": round(r_b, 4),
"direction": direction,
"confidence": conf,
"data_points": n_b,
"interpretation": interp,
"lag_details": lag_details,
}
def _correlate_load_vitals(profile_id: str, vital: str, max_lag: int) -> Optional[Dict]:
"""
Correlate training load with HRV or RHR
Test lags: 1, 2, 3 days
Pearson: Tages-Trainingslast (Summe duration_min) vs. Vitals (HRV ms oder Ruhepuls)
am Kalendertag t+Lag (typisch: Belastung am Vortag, Vitalwert am Folgetag bei Lag 1).
"""
# TODO: Implement full correlation calculation
if vital == 'hrv':
col = "hrv" if vital == "hrv" else "resting_hr"
cutoff = (datetime.now() - timedelta(days=LAG_CORR_LOOKBACK_DAYS)).strftime("%Y-%m-%d")
with get_db() as conn:
cur = get_cursor(conn)
cur.execute(
"""
SELECT date::text AS d, COALESCE(SUM(duration_min), 0)::float AS minutes
FROM activity_log
WHERE profile_id = %s AND date >= %s::date
AND duration_min IS NOT NULL AND duration_min > 0
GROUP BY date
ORDER BY date
""",
(profile_id, cutoff),
)
load_rows = cur.fetchall()
cur.execute(
f"""
SELECT date::text AS d, {col}::float AS v
FROM vitals_baseline
WHERE profile_id = %s AND date >= %s::date AND {col} IS NOT NULL
ORDER BY date
""",
(profile_id, cutoff),
)
vit_rows = cur.fetchall()
load_by = {str(r["d"])[:10]: float(r["minutes"] or 0) for r in load_rows}
vital_by = {str(r["d"])[:10]: float(r["v"]) for r in vit_rows}
best: Optional[Tuple[int, float, int]] = None
lag_details: List[Dict[str, Any]] = []
max_l = max(0, min(int(max_lag), 28))
vlabel = "HRV (ms)" if vital == "hrv" else "Ruhepuls (bpm)"
for lag in range(0, max_l + 1):
xs: List[float] = []
ys: List[float] = []
for ds in sorted(load_by.keys()):
d0 = _parse_iso_to_date(ds)
if d0 is None:
continue
d1 = d0 + timedelta(days=lag)
ds1 = d1.isoformat()
if ds1 not in vital_by:
continue
xs.append(load_by[ds])
ys.append(vital_by[ds1])
r = _pearson_r(xs, ys)
n_p = len(xs)
lag_details.append({"lag": lag, "n_pairs": n_p, "r": None if r is None else round(r, 4)})
if r is None:
continue
if best is None or abs(r) > abs(best[1]):
best = (lag, r, n_p)
if best is None:
return {
'best_lag': 1,
'correlation': -0.38, # Negative = high load reduces HRV (expected)
'direction': 'negative',
'confidence': 'medium',
'data_points': 25
}
else: # rhr
return {
'best_lag': 1,
'correlation': 0.42, # Positive = high load increases RHR (expected)
'direction': 'positive',
'confidence': 'medium',
'data_points': 25
"best_lag": None,
"correlation": None,
"direction": "none",
"confidence": "insufficient",
"data_points": 0,
"interpretation": f"Zu wenige gepaarte Tage mit Training und {vlabel}.",
"reason": "insufficient_pairs",
"lag_details": lag_details,
"vital": vital,
}
lag_b, r_b, n_b = best
direction = _direction_from_r(r_b)
conf = _lag_confidence(n_b, r_b)
interp = (
f"Trainingsminuten/Tag vs. {vlabel} nach {lag_b} Tagen Lag: r ≈ {r_b:.2f} ({direction}). "
f"{n_b} Paare."
)
return {
"best_lag": lag_b,
"correlation": round(r_b, 4),
"direction": direction,
"confidence": conf,
"data_points": n_b,
"interpretation": interp,
"lag_details": lag_details,
"vital": vital,
}
# ============================================================================
# C4: Sleep vs. Recovery Correlation

View File

@ -1115,6 +1115,9 @@ def get_weight_energy_correlation_chart(
corr_data = calculate_lag_correlation(profile_id, "energy_balance", "weight", max_lag)
if not corr_data or corr_data.get('correlation') is None:
msg = "Nicht genug Daten für Korrelationsanalyse"
if isinstance(corr_data, dict):
msg = str(corr_data.get("interpretation") or corr_data.get("reason") or msg)
return {
"chart_type": "scatter",
"data": {
@ -1123,14 +1126,15 @@ def get_weight_energy_correlation_chart(
},
"metadata": {
"confidence": "insufficient",
"data_points": 0,
"message": "Nicht genug Daten für Korrelationsanalyse"
"data_points": corr_data.get("data_points", 0) if isinstance(corr_data, dict) else 0,
"message": msg,
"lag_details": corr_data.get("lag_details") if isinstance(corr_data, dict) else None,
"tdee_kcal_used": corr_data.get("tdee_kcal_used") if isinstance(corr_data, dict) else None,
}
}
# Create lag vs correlation data for chart
# For simplicity, show best lag point as single data point
best_lag = corr_data.get('best_lag_days', 0)
# Ein Punkt: bestes Lag (max. |r|) — Berechnung in data_layer.correlations (Issue 53)
best_lag = corr_data.get('best_lag_days', corr_data.get('best_lag', 0))
correlation = corr_data.get('correlation', 0)
return {
@ -1150,10 +1154,13 @@ def get_weight_energy_correlation_chart(
},
"metadata": {
"confidence": corr_data.get('confidence', 'low'),
"correlation": round(correlation, 3),
"correlation": round(float(correlation), 3),
"best_lag_days": best_lag,
"interpretation": corr_data.get('interpretation', ''),
"data_points": corr_data.get('data_points', 0)
"data_points": corr_data.get('data_points', 0),
"lag_details": corr_data.get("lag_details"),
"tdee_kcal_used": corr_data.get("tdee_kcal_used"),
"layer_1": "correlations._correlate_energy_weight",
}
}
@ -1180,6 +1187,9 @@ def get_lbm_protein_correlation_chart(
corr_data = calculate_lag_correlation(profile_id, "protein", "lbm", max_lag)
if not corr_data or corr_data.get('correlation') is None:
msg = "Nicht genug Daten für LBM-Protein Korrelation"
if isinstance(corr_data, dict):
msg = str(corr_data.get("interpretation") or corr_data.get("reason") or msg)
return {
"chart_type": "scatter",
"data": {
@ -1188,12 +1198,13 @@ def get_lbm_protein_correlation_chart(
},
"metadata": {
"confidence": "insufficient",
"data_points": 0,
"message": "Nicht genug Daten für LBM-Protein Korrelation"
"data_points": corr_data.get("data_points", 0) if isinstance(corr_data, dict) else 0,
"message": msg,
"lag_details": corr_data.get("lag_details") if isinstance(corr_data, dict) else None,
}
}
best_lag = corr_data.get('best_lag_days', 0)
best_lag = corr_data.get('best_lag_days', corr_data.get('best_lag', 0))
correlation = corr_data.get('correlation', 0)
return {
@ -1213,10 +1224,12 @@ def get_lbm_protein_correlation_chart(
},
"metadata": {
"confidence": corr_data.get('confidence', 'low'),
"correlation": round(correlation, 3),
"correlation": round(float(correlation), 3),
"best_lag_days": best_lag,
"interpretation": corr_data.get('interpretation', ''),
"data_points": corr_data.get('data_points', 0)
"data_points": corr_data.get('data_points', 0),
"lag_details": corr_data.get("lag_details"),
"layer_1": "correlations._correlate_protein_lbm",
}
}
@ -1240,35 +1253,54 @@ def get_load_vitals_correlation_chart(
"""
profile_id = session['profile_id']
# Try HRV first
corr_hrv = calculate_lag_correlation(profile_id, "load", "hrv", max_lag)
corr_rhr = calculate_lag_correlation(profile_id, "load", "rhr", max_lag)
# Use whichever has stronger correlation
if corr_hrv and corr_rhr:
corr_data = corr_hrv if abs(corr_hrv.get('correlation', 0)) > abs(corr_rhr.get('correlation', 0)) else corr_rhr
metric_name = "HRV" if corr_data == corr_hrv else "RHR"
elif corr_hrv:
corr_data = corr_hrv
metric_name = "HRV"
elif corr_rhr:
corr_data = corr_rhr
metric_name = "RHR"
else:
def _abs_corr(c):
if not c or c.get("correlation") is None:
return -1.0
try:
return abs(float(c["correlation"]))
except (TypeError, ValueError):
return -1.0
if _abs_corr(corr_hrv) < 0 and _abs_corr(corr_rhr) < 0:
msg = "Nicht genug Daten für Load-Vitals Korrelation"
h_msg = corr_hrv.get("interpretation") if isinstance(corr_hrv, dict) else None
r_msg = corr_rhr.get("interpretation") if isinstance(corr_rhr, dict) else None
if h_msg or r_msg:
msg = f"HRV: {h_msg or ''} · RHR: {r_msg or ''}"
return {
"chart_type": "scatter",
"data": {
"labels": [],
"datasets": []
},
"data": {"labels": [], "datasets": []},
"metadata": {
"confidence": "insufficient",
"data_points": 0,
"message": "Nicht genug Daten für Load-Vitals Korrelation"
}
"message": msg,
"lag_details_hrv": corr_hrv.get("lag_details") if isinstance(corr_hrv, dict) else None,
"lag_details_rhr": corr_rhr.get("lag_details") if isinstance(corr_rhr, dict) else None,
},
}
best_lag = corr_data.get('best_lag_days', 0)
if _abs_corr(corr_hrv) >= _abs_corr(corr_rhr):
corr_data = corr_hrv
metric_name = "HRV"
else:
corr_data = corr_rhr
metric_name = "RHR"
if not corr_data or corr_data.get("correlation") is None:
return {
"chart_type": "scatter",
"data": {"labels": [], "datasets": []},
"metadata": {
"confidence": "insufficient",
"data_points": 0,
"message": str(corr_data.get("interpretation") or "Nicht genug Daten für Load-Vitals Korrelation"),
},
}
best_lag = corr_data.get('best_lag_days', corr_data.get('best_lag', 0))
correlation = corr_data.get('correlation', 0)
return {
@ -1288,11 +1320,13 @@ def get_load_vitals_correlation_chart(
},
"metadata": {
"confidence": corr_data.get('confidence', 'low'),
"correlation": round(correlation, 3),
"correlation": round(float(correlation), 3),
"best_lag_days": best_lag,
"metric": metric_name,
"interpretation": corr_data.get('interpretation', ''),
"data_points": corr_data.get('data_points', 0)
"data_points": corr_data.get('data_points', 0),
"lag_details": corr_data.get("lag_details"),
"layer_1": "correlations._correlate_load_vitals",
}
}