diff --git a/backend/data_layer/__init__.py b/backend/data_layer/__init__.py index 63ec722..2742cde 100644 --- a/backend/data_layer/__init__.py +++ b/backend/data_layer/__init__.py @@ -35,10 +35,10 @@ from .activity_metrics import * from .recovery_metrics import * from .health_metrics import * from .scores import * +from .correlations import * # Future imports (will be added as modules are created): # from .goals import * -# from .correlations import * __all__ = [ # Utils @@ -149,4 +149,11 @@ __all__ = [ 'get_top_focus_area', 'calculate_focus_area_progress', 'calculate_category_progress', + + # Correlation Metrics + 'calculate_lag_correlation', + 'calculate_correlation_sleep_recovery', + 'calculate_plateau_detected', + 'calculate_top_drivers', + 'calculate_correlation_confidence', ] diff --git a/backend/data_layer/correlations.py b/backend/data_layer/correlations.py new file mode 100644 index 0000000..4826533 --- /dev/null +++ b/backend/data_layer/correlations.py @@ -0,0 +1,503 @@ +""" +Correlation Metrics Data Layer + +Provides structured correlation analysis and plateau detection functions. + +Functions: + - calculate_lag_correlation(): Lag correlation between variables + - calculate_correlation_sleep_recovery(): Sleep-recovery correlation + - calculate_plateau_detected(): Plateau detection (weight, strength, endurance) + - calculate_top_drivers(): Top drivers for current goals + - calculate_correlation_confidence(): Confidence level for correlations + +All functions return structured data (dict) or simple values. +Use placeholder_resolver.py for formatted strings for AI. + +Phase 0c: Multi-Layer Architecture +Version: 1.0 +""" + +from typing import Dict, List, Optional, Tuple +from datetime import datetime, timedelta, date +from db import get_db, get_cursor, r2d +import statistics + +def calculate_lag_correlation(profile_id: str, var1: str, var2: str, max_lag_days: int = 14) -> Optional[Dict]: + """ + Calculate lagged correlation between two variables + + Args: + var1: 'energy', 'protein', 'training_load' + var2: 'weight', 'lbm', 'hrv', 'rhr' + max_lag_days: Maximum lag to test + + Returns: + { + 'best_lag': X, # days + 'correlation': 0.XX, # -1 to 1 + 'direction': 'positive'/'negative'/'none', + 'confidence': 'high'/'medium'/'low', + 'data_points': N + } + """ + if var1 == 'energy' and var2 == 'weight': + return _correlate_energy_weight(profile_id, max_lag_days) + elif var1 == 'protein' and var2 == 'lbm': + return _correlate_protein_lbm(profile_id, max_lag_days) + elif var1 == 'training_load' and var2 in ['hrv', 'rhr']: + return _correlate_load_vitals(profile_id, var2, max_lag_days) + else: + return None + + +def _correlate_energy_weight(profile_id: str, max_lag: int) -> Optional[Dict]: + """ + Correlate energy balance with weight change + Test lags: 0, 3, 7, 10, 14 days + """ + with get_db() as conn: + cur = get_cursor(conn) + + # Get energy balance data (daily calories - estimated TDEE) + cur.execute(""" + SELECT n.date, n.kcal, w.weight + FROM nutrition_log n + LEFT JOIN weight_log w ON w.profile_id = n.profile_id + AND w.date = n.date + WHERE n.profile_id = %s + AND n.date >= CURRENT_DATE - INTERVAL '90 days' + ORDER BY n.date + """, (profile_id,)) + + data = cur.fetchall() + + if len(data) < 30: + return { + 'best_lag': None, + 'correlation': None, + 'direction': 'none', + 'confidence': 'low', + 'data_points': len(data), + 'reason': 'Insufficient data (<30 days)' + } + + # Calculate 7d rolling energy balance + # (Simplified - actual implementation would need TDEE estimation) + + # For now, return placeholder + return { + 'best_lag': 7, + 'correlation': -0.45, # Placeholder + 'direction': 'negative', # Higher deficit = lower weight (expected) + 'confidence': 'medium', + 'data_points': len(data) + } + + +def _correlate_protein_lbm(profile_id: str, max_lag: int) -> Optional[Dict]: + """Correlate protein intake with LBM trend""" + # TODO: Implement full correlation calculation + return { + 'best_lag': 0, + 'correlation': 0.32, # Placeholder + 'direction': 'positive', + 'confidence': 'medium', + 'data_points': 28 + } + + +def _correlate_load_vitals(profile_id: str, vital: str, max_lag: int) -> Optional[Dict]: + """ + Correlate training load with HRV or RHR + Test lags: 1, 2, 3 days + """ + # TODO: Implement full correlation calculation + if vital == 'hrv': + return { + 'best_lag': 1, + 'correlation': -0.38, # Negative = high load reduces HRV (expected) + 'direction': 'negative', + 'confidence': 'medium', + 'data_points': 25 + } + else: # rhr + return { + 'best_lag': 1, + 'correlation': 0.42, # Positive = high load increases RHR (expected) + 'direction': 'positive', + 'confidence': 'medium', + 'data_points': 25 + } + + +# ============================================================================ +# C4: Sleep vs. Recovery Correlation +# ============================================================================ + +def calculate_correlation_sleep_recovery(profile_id: str) -> Optional[Dict]: + """ + Correlate sleep quality/duration with recovery score + """ + # TODO: Implement full correlation + return { + 'correlation': 0.65, # Strong positive (expected) + 'direction': 'positive', + 'confidence': 'high', + 'data_points': 28 + } + + +# ============================================================================ +# C6: Plateau Detector +# ============================================================================ + +def calculate_plateau_detected(profile_id: str) -> Optional[Dict]: + """ + Detect if user is in a plateau based on goal mode + + Returns: + { + 'plateau_detected': True/False, + 'plateau_type': 'weight_loss'/'strength'/'endurance'/None, + 'confidence': 'high'/'medium'/'low', + 'duration_days': X, + 'top_factors': [list of potential causes] + } + """ + from calculations.scores import get_user_focus_weights + + focus_weights = get_user_focus_weights(profile_id) + + if not focus_weights: + return None + + # Determine primary focus area + top_focus = max(focus_weights, key=focus_weights.get) + + # Check for plateau based on focus area + if top_focus in ['körpergewicht', 'körperfett']: + return _detect_weight_plateau(profile_id) + elif top_focus == 'kraftaufbau': + return _detect_strength_plateau(profile_id) + elif top_focus == 'cardio': + return _detect_endurance_plateau(profile_id) + else: + return None + + +def _detect_weight_plateau(profile_id: str) -> Dict: + """Detect weight loss plateau""" + from calculations.body_metrics import calculate_weight_28d_slope + from calculations.nutrition_metrics import calculate_nutrition_score + + slope = calculate_weight_28d_slope(profile_id) + nutrition_score = calculate_nutrition_score(profile_id) + + if slope is None: + return {'plateau_detected': False, 'reason': 'Insufficient data'} + + # Plateau = flat weight for 28 days despite adherence + is_plateau = abs(slope) < 0.02 and nutrition_score and nutrition_score > 70 + + if is_plateau: + factors = [] + + # Check potential factors + if nutrition_score > 85: + factors.append('Hohe Adhärenz trotz Stagnation → mögliche Anpassung des Stoffwechsels') + + # Check if deficit is too small + from calculations.nutrition_metrics import calculate_energy_balance_7d + balance = calculate_energy_balance_7d(profile_id) + if balance and balance > -200: + factors.append('Energiedefizit zu gering (<200 kcal/Tag)') + + # Check water retention (if waist is shrinking but weight stable) + from calculations.body_metrics import calculate_waist_28d_delta + waist_delta = calculate_waist_28d_delta(profile_id) + if waist_delta and waist_delta < -1: + factors.append('Taillenumfang sinkt → mögliche Wasserretention maskiert Fettabbau') + + return { + 'plateau_detected': True, + 'plateau_type': 'weight_loss', + 'confidence': 'high' if len(factors) >= 2 else 'medium', + 'duration_days': 28, + 'top_factors': factors[:3] + } + else: + return {'plateau_detected': False} + + +def _detect_strength_plateau(profile_id: str) -> Dict: + """Detect strength training plateau""" + from calculations.body_metrics import calculate_lbm_28d_change + from calculations.activity_metrics import calculate_activity_score + from calculations.recovery_metrics import calculate_recovery_score_v2 + + lbm_change = calculate_lbm_28d_change(profile_id) + activity_score = calculate_activity_score(profile_id) + recovery_score = calculate_recovery_score_v2(profile_id) + + if lbm_change is None: + return {'plateau_detected': False, 'reason': 'Insufficient data'} + + # Plateau = flat LBM despite high activity score + is_plateau = abs(lbm_change) < 0.3 and activity_score and activity_score > 75 + + if is_plateau: + factors = [] + + if recovery_score and recovery_score < 60: + factors.append('Recovery Score niedrig → möglicherweise Übertraining') + + from calculations.nutrition_metrics import calculate_protein_adequacy_28d + protein_score = calculate_protein_adequacy_28d(profile_id) + if protein_score and protein_score < 70: + factors.append('Proteinzufuhr unter Zielbereich') + + from calculations.activity_metrics import calculate_monotony_score + monotony = calculate_monotony_score(profile_id) + if monotony and monotony > 2.0: + factors.append('Hohe Trainingsmonotonie → Stimulus-Anpassung') + + return { + 'plateau_detected': True, + 'plateau_type': 'strength', + 'confidence': 'medium', + 'duration_days': 28, + 'top_factors': factors[:3] + } + else: + return {'plateau_detected': False} + + +def _detect_endurance_plateau(profile_id: str) -> Dict: + """Detect endurance plateau""" + from calculations.activity_metrics import calculate_training_minutes_week, calculate_monotony_score + from calculations.recovery_metrics import calculate_vo2max_trend_28d + + # TODO: Implement when vitals_baseline.vo2_max is populated + return {'plateau_detected': False, 'reason': 'VO2max tracking not yet implemented'} + + +# ============================================================================ +# C7: Multi-Factor Driver Panel +# ============================================================================ + +def calculate_top_drivers(profile_id: str) -> Optional[List[Dict]]: + """ + Calculate top influencing factors for goal progress + + Returns list of drivers: + [ + { + 'factor': 'Energiebilanz', + 'status': 'förderlich'/'neutral'/'hinderlich', + 'evidence': 'hoch'/'mittel'/'niedrig', + 'reason': '1-sentence explanation' + }, + ... + ] + """ + drivers = [] + + # 1. Energy balance + from calculations.nutrition_metrics import calculate_energy_balance_7d + balance = calculate_energy_balance_7d(profile_id) + if balance is not None: + if -500 <= balance <= -200: + status = 'förderlich' + reason = f'Moderates Defizit ({int(balance)} kcal/Tag) unterstützt Fettabbau' + elif balance < -800: + status = 'hinderlich' + reason = f'Sehr großes Defizit ({int(balance)} kcal/Tag) → Risiko für Magermasseverlust' + elif -200 < balance < 200: + status = 'neutral' + reason = 'Energiebilanz ausgeglichen' + else: + status = 'neutral' + reason = f'Energieüberschuss ({int(balance)} kcal/Tag)' + + drivers.append({ + 'factor': 'Energiebilanz', + 'status': status, + 'evidence': 'hoch', + 'reason': reason + }) + + # 2. Protein adequacy + from calculations.nutrition_metrics import calculate_protein_adequacy_28d + protein_score = calculate_protein_adequacy_28d(profile_id) + if protein_score is not None: + if protein_score >= 80: + status = 'förderlich' + reason = f'Proteinzufuhr konstant im Zielbereich (Score: {protein_score})' + elif protein_score >= 60: + status = 'neutral' + reason = f'Proteinzufuhr teilweise im Zielbereich (Score: {protein_score})' + else: + status = 'hinderlich' + reason = f'Proteinzufuhr häufig unter Zielbereich (Score: {protein_score})' + + drivers.append({ + 'factor': 'Proteinzufuhr', + 'status': status, + 'evidence': 'hoch', + 'reason': reason + }) + + # 3. Sleep duration + from calculations.recovery_metrics import calculate_sleep_avg_duration_7d + sleep_hours = calculate_sleep_avg_duration_7d(profile_id) + if sleep_hours is not None: + if sleep_hours >= 7: + status = 'förderlich' + reason = f'Schlafdauer ausreichend ({sleep_hours:.1f}h/Nacht)' + elif sleep_hours >= 6.5: + status = 'neutral' + reason = f'Schlafdauer knapp ausreichend ({sleep_hours:.1f}h/Nacht)' + else: + status = 'hinderlich' + reason = f'Schlafdauer zu gering ({sleep_hours:.1f}h/Nacht < 7h Empfehlung)' + + drivers.append({ + 'factor': 'Schlafdauer', + 'status': status, + 'evidence': 'hoch', + 'reason': reason + }) + + # 4. Sleep regularity + from calculations.recovery_metrics import calculate_sleep_regularity_proxy + regularity = calculate_sleep_regularity_proxy(profile_id) + if regularity is not None: + if regularity <= 45: + status = 'förderlich' + reason = f'Schlafrhythmus regelmäßig (Abweichung: {int(regularity)} min)' + elif regularity <= 75: + status = 'neutral' + reason = f'Schlafrhythmus moderat variabel (Abweichung: {int(regularity)} min)' + else: + status = 'hinderlich' + reason = f'Schlafrhythmus stark variabel (Abweichung: {int(regularity)} min)' + + drivers.append({ + 'factor': 'Schlafregelmäßigkeit', + 'status': status, + 'evidence': 'mittel', + 'reason': reason + }) + + # 5. Training consistency + from calculations.activity_metrics import calculate_training_frequency_7d + frequency = calculate_training_frequency_7d(profile_id) + if frequency is not None: + if 3 <= frequency <= 6: + status = 'förderlich' + reason = f'Trainingsfrequenz im Zielbereich ({frequency}× pro Woche)' + elif frequency <= 2: + status = 'hinderlich' + reason = f'Trainingsfrequenz zu niedrig ({frequency}× pro Woche)' + else: + status = 'neutral' + reason = f'Trainingsfrequenz sehr hoch ({frequency}× pro Woche) → Recovery beachten' + + drivers.append({ + 'factor': 'Trainingskonsistenz', + 'status': status, + 'evidence': 'hoch', + 'reason': reason + }) + + # 6. Quality sessions + from calculations.activity_metrics import calculate_quality_sessions_pct + quality_pct = calculate_quality_sessions_pct(profile_id) + if quality_pct is not None: + if quality_pct >= 75: + status = 'förderlich' + reason = f'{quality_pct}% der Trainings mit guter Qualität' + elif quality_pct >= 50: + status = 'neutral' + reason = f'{quality_pct}% der Trainings mit guter Qualität' + else: + status = 'hinderlich' + reason = f'Nur {quality_pct}% der Trainings mit guter Qualität' + + drivers.append({ + 'factor': 'Trainingsqualität', + 'status': status, + 'evidence': 'mittel', + 'reason': reason + }) + + # 7. Recovery score + from calculations.recovery_metrics import calculate_recovery_score_v2 + recovery = calculate_recovery_score_v2(profile_id) + if recovery is not None: + if recovery >= 70: + status = 'förderlich' + reason = f'Recovery Score gut ({recovery}/100)' + elif recovery >= 50: + status = 'neutral' + reason = f'Recovery Score moderat ({recovery}/100)' + else: + status = 'hinderlich' + reason = f'Recovery Score niedrig ({recovery}/100) → mehr Erholung nötig' + + drivers.append({ + 'factor': 'Recovery', + 'status': status, + 'evidence': 'hoch', + 'reason': reason + }) + + # 8. Rest day compliance + from calculations.activity_metrics import calculate_rest_day_compliance + compliance = calculate_rest_day_compliance(profile_id) + if compliance is not None: + if compliance >= 80: + status = 'förderlich' + reason = f'Ruhetage gut eingehalten ({compliance}%)' + elif compliance >= 60: + status = 'neutral' + reason = f'Ruhetage teilweise eingehalten ({compliance}%)' + else: + status = 'hinderlich' + reason = f'Ruhetage häufig ignoriert ({compliance}%) → Übertrainingsrisiko' + + drivers.append({ + 'factor': 'Ruhetagsrespekt', + 'status': status, + 'evidence': 'mittel', + 'reason': reason + }) + + # Sort by importance: hinderlich first, then förderlich, then neutral + priority = {'hinderlich': 0, 'förderlich': 1, 'neutral': 2} + drivers.sort(key=lambda d: priority[d['status']]) + + return drivers[:8] # Top 8 drivers + + +# ============================================================================ +# Confidence/Evidence Levels +# ============================================================================ + +def calculate_correlation_confidence(data_points: int, correlation: float) -> str: + """ + Determine confidence level for correlation + + Returns: 'high', 'medium', or 'low' + """ + # Need sufficient data points + if data_points < 20: + return 'low' + + # Strong correlation with good data + if data_points >= 40 and abs(correlation) >= 0.5: + return 'high' + elif data_points >= 30 and abs(correlation) >= 0.4: + return 'medium' + else: + return 'low' diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py index 187adda..b81e9e7 100644 --- a/backend/placeholder_resolver.py +++ b/backend/placeholder_resolver.py @@ -417,8 +417,8 @@ def _safe_int(func_name: str, profile_id: str) -> str: import traceback try: # Import calculations dynamically to avoid circular imports - from calculations import correlation_metrics from data_layer import body_metrics, nutrition_metrics, activity_metrics, recovery_metrics, scores + from data_layer import correlations as correlation_metrics # Map function names to actual functions func_map = { @@ -530,7 +530,8 @@ def _safe_str(func_name: str, profile_id: str) -> str: """ import traceback try: - from calculations import body_metrics, nutrition_metrics, activity_metrics, scores, correlation_metrics + from data_layer import body_metrics, nutrition_metrics, activity_metrics, scores + from data_layer import correlations as correlation_metrics func_map = { 'top_goal_name': lambda pid: (scores.get_top_priority_goal(pid).get('name') or scores.get_top_priority_goal(pid).get('goal_type')) if scores.get_top_priority_goal(pid) else None, @@ -566,7 +567,8 @@ def _safe_json(func_name: str, profile_id: str) -> str: import traceback try: import json - from calculations import scores, correlation_metrics + from data_layer import scores + from data_layer import correlations as correlation_metrics func_map = { 'correlation_energy_weight_lag': lambda pid: correlation_metrics.calculate_lag_correlation(pid, 'energy', 'weight'),