mitai-jinkendo/backend/data_layer/correlations.py
Lars befa060671
All checks were successful
Deploy Development / deploy (push) Successful in 44s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 12s
feat: Phase 0c - migrate correlation_metrics to data_layer/correlations (11 functions)
- Created NEW data_layer/correlations.py with all 11 correlation functions
- Functions: Lag correlation (main + 3 helpers: energy/weight, protein/LBM, load/vitals)
- Functions: Sleep-recovery correlation
- Functions: Plateau detection (main + 3 detectors: weight, strength, endurance)
- Functions: Top drivers analysis
- Functions: Correlation confidence helper
- Updated data_layer/__init__.py to import correlations module and export 5 main functions
- Refactored placeholder_resolver.py to import correlations from data_layer (as correlation_metrics alias)
- Removed ALL imports from calculations/ module in placeholder_resolver.py

Module 6/6 complete. ALL calculations migrated to data_layer!
Phase 0c Multi-Layer Architecture COMPLETE.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-28 20:28:26 +01:00

504 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Correlation Metrics Data Layer
Provides structured correlation analysis and plateau detection functions.
Functions:
- calculate_lag_correlation(): Lag correlation between variables
- calculate_correlation_sleep_recovery(): Sleep-recovery correlation
- calculate_plateau_detected(): Plateau detection (weight, strength, endurance)
- calculate_top_drivers(): Top drivers for current goals
- calculate_correlation_confidence(): Confidence level for correlations
All functions return structured data (dict) or simple values.
Use placeholder_resolver.py for formatted strings for AI.
Phase 0c: Multi-Layer Architecture
Version: 1.0
"""
from typing import Dict, List, Optional, Tuple
from datetime import datetime, timedelta, date
from db import get_db, get_cursor, r2d
import statistics
def calculate_lag_correlation(profile_id: str, var1: str, var2: str, max_lag_days: int = 14) -> Optional[Dict]:
"""
Calculate lagged correlation between two variables
Args:
var1: 'energy', 'protein', 'training_load'
var2: 'weight', 'lbm', 'hrv', 'rhr'
max_lag_days: Maximum lag to test
Returns:
{
'best_lag': X, # days
'correlation': 0.XX, # -1 to 1
'direction': 'positive'/'negative'/'none',
'confidence': 'high'/'medium'/'low',
'data_points': N
}
"""
if var1 == 'energy' and var2 == 'weight':
return _correlate_energy_weight(profile_id, max_lag_days)
elif var1 == 'protein' and var2 == 'lbm':
return _correlate_protein_lbm(profile_id, max_lag_days)
elif var1 == 'training_load' and var2 in ['hrv', 'rhr']:
return _correlate_load_vitals(profile_id, var2, max_lag_days)
else:
return None
def _correlate_energy_weight(profile_id: str, max_lag: int) -> Optional[Dict]:
"""
Correlate energy balance with weight change
Test lags: 0, 3, 7, 10, 14 days
"""
with get_db() as conn:
cur = get_cursor(conn)
# Get energy balance data (daily calories - estimated TDEE)
cur.execute("""
SELECT n.date, n.kcal, w.weight
FROM nutrition_log n
LEFT JOIN weight_log w ON w.profile_id = n.profile_id
AND w.date = n.date
WHERE n.profile_id = %s
AND n.date >= CURRENT_DATE - INTERVAL '90 days'
ORDER BY n.date
""", (profile_id,))
data = cur.fetchall()
if len(data) < 30:
return {
'best_lag': None,
'correlation': None,
'direction': 'none',
'confidence': 'low',
'data_points': len(data),
'reason': 'Insufficient data (<30 days)'
}
# Calculate 7d rolling energy balance
# (Simplified - actual implementation would need TDEE estimation)
# For now, return placeholder
return {
'best_lag': 7,
'correlation': -0.45, # Placeholder
'direction': 'negative', # Higher deficit = lower weight (expected)
'confidence': 'medium',
'data_points': len(data)
}
def _correlate_protein_lbm(profile_id: str, max_lag: int) -> Optional[Dict]:
"""Correlate protein intake with LBM trend"""
# TODO: Implement full correlation calculation
return {
'best_lag': 0,
'correlation': 0.32, # Placeholder
'direction': 'positive',
'confidence': 'medium',
'data_points': 28
}
def _correlate_load_vitals(profile_id: str, vital: str, max_lag: int) -> Optional[Dict]:
"""
Correlate training load with HRV or RHR
Test lags: 1, 2, 3 days
"""
# TODO: Implement full correlation calculation
if vital == 'hrv':
return {
'best_lag': 1,
'correlation': -0.38, # Negative = high load reduces HRV (expected)
'direction': 'negative',
'confidence': 'medium',
'data_points': 25
}
else: # rhr
return {
'best_lag': 1,
'correlation': 0.42, # Positive = high load increases RHR (expected)
'direction': 'positive',
'confidence': 'medium',
'data_points': 25
}
# ============================================================================
# C4: Sleep vs. Recovery Correlation
# ============================================================================
def calculate_correlation_sleep_recovery(profile_id: str) -> Optional[Dict]:
"""
Correlate sleep quality/duration with recovery score
"""
# TODO: Implement full correlation
return {
'correlation': 0.65, # Strong positive (expected)
'direction': 'positive',
'confidence': 'high',
'data_points': 28
}
# ============================================================================
# C6: Plateau Detector
# ============================================================================
def calculate_plateau_detected(profile_id: str) -> Optional[Dict]:
"""
Detect if user is in a plateau based on goal mode
Returns:
{
'plateau_detected': True/False,
'plateau_type': 'weight_loss'/'strength'/'endurance'/None,
'confidence': 'high'/'medium'/'low',
'duration_days': X,
'top_factors': [list of potential causes]
}
"""
from calculations.scores import get_user_focus_weights
focus_weights = get_user_focus_weights(profile_id)
if not focus_weights:
return None
# Determine primary focus area
top_focus = max(focus_weights, key=focus_weights.get)
# Check for plateau based on focus area
if top_focus in ['körpergewicht', 'körperfett']:
return _detect_weight_plateau(profile_id)
elif top_focus == 'kraftaufbau':
return _detect_strength_plateau(profile_id)
elif top_focus == 'cardio':
return _detect_endurance_plateau(profile_id)
else:
return None
def _detect_weight_plateau(profile_id: str) -> Dict:
"""Detect weight loss plateau"""
from calculations.body_metrics import calculate_weight_28d_slope
from calculations.nutrition_metrics import calculate_nutrition_score
slope = calculate_weight_28d_slope(profile_id)
nutrition_score = calculate_nutrition_score(profile_id)
if slope is None:
return {'plateau_detected': False, 'reason': 'Insufficient data'}
# Plateau = flat weight for 28 days despite adherence
is_plateau = abs(slope) < 0.02 and nutrition_score and nutrition_score > 70
if is_plateau:
factors = []
# Check potential factors
if nutrition_score > 85:
factors.append('Hohe Adhärenz trotz Stagnation → mögliche Anpassung des Stoffwechsels')
# Check if deficit is too small
from calculations.nutrition_metrics import calculate_energy_balance_7d
balance = calculate_energy_balance_7d(profile_id)
if balance and balance > -200:
factors.append('Energiedefizit zu gering (<200 kcal/Tag)')
# Check water retention (if waist is shrinking but weight stable)
from calculations.body_metrics import calculate_waist_28d_delta
waist_delta = calculate_waist_28d_delta(profile_id)
if waist_delta and waist_delta < -1:
factors.append('Taillenumfang sinkt → mögliche Wasserretention maskiert Fettabbau')
return {
'plateau_detected': True,
'plateau_type': 'weight_loss',
'confidence': 'high' if len(factors) >= 2 else 'medium',
'duration_days': 28,
'top_factors': factors[:3]
}
else:
return {'plateau_detected': False}
def _detect_strength_plateau(profile_id: str) -> Dict:
"""Detect strength training plateau"""
from calculations.body_metrics import calculate_lbm_28d_change
from calculations.activity_metrics import calculate_activity_score
from calculations.recovery_metrics import calculate_recovery_score_v2
lbm_change = calculate_lbm_28d_change(profile_id)
activity_score = calculate_activity_score(profile_id)
recovery_score = calculate_recovery_score_v2(profile_id)
if lbm_change is None:
return {'plateau_detected': False, 'reason': 'Insufficient data'}
# Plateau = flat LBM despite high activity score
is_plateau = abs(lbm_change) < 0.3 and activity_score and activity_score > 75
if is_plateau:
factors = []
if recovery_score and recovery_score < 60:
factors.append('Recovery Score niedrig → möglicherweise Übertraining')
from calculations.nutrition_metrics import calculate_protein_adequacy_28d
protein_score = calculate_protein_adequacy_28d(profile_id)
if protein_score and protein_score < 70:
factors.append('Proteinzufuhr unter Zielbereich')
from calculations.activity_metrics import calculate_monotony_score
monotony = calculate_monotony_score(profile_id)
if monotony and monotony > 2.0:
factors.append('Hohe Trainingsmonotonie → Stimulus-Anpassung')
return {
'plateau_detected': True,
'plateau_type': 'strength',
'confidence': 'medium',
'duration_days': 28,
'top_factors': factors[:3]
}
else:
return {'plateau_detected': False}
def _detect_endurance_plateau(profile_id: str) -> Dict:
"""Detect endurance plateau"""
from calculations.activity_metrics import calculate_training_minutes_week, calculate_monotony_score
from calculations.recovery_metrics import calculate_vo2max_trend_28d
# TODO: Implement when vitals_baseline.vo2_max is populated
return {'plateau_detected': False, 'reason': 'VO2max tracking not yet implemented'}
# ============================================================================
# C7: Multi-Factor Driver Panel
# ============================================================================
def calculate_top_drivers(profile_id: str) -> Optional[List[Dict]]:
"""
Calculate top influencing factors for goal progress
Returns list of drivers:
[
{
'factor': 'Energiebilanz',
'status': 'förderlich'/'neutral'/'hinderlich',
'evidence': 'hoch'/'mittel'/'niedrig',
'reason': '1-sentence explanation'
},
...
]
"""
drivers = []
# 1. Energy balance
from calculations.nutrition_metrics import calculate_energy_balance_7d
balance = calculate_energy_balance_7d(profile_id)
if balance is not None:
if -500 <= balance <= -200:
status = 'förderlich'
reason = f'Moderates Defizit ({int(balance)} kcal/Tag) unterstützt Fettabbau'
elif balance < -800:
status = 'hinderlich'
reason = f'Sehr großes Defizit ({int(balance)} kcal/Tag) → Risiko für Magermasseverlust'
elif -200 < balance < 200:
status = 'neutral'
reason = 'Energiebilanz ausgeglichen'
else:
status = 'neutral'
reason = f'Energieüberschuss ({int(balance)} kcal/Tag)'
drivers.append({
'factor': 'Energiebilanz',
'status': status,
'evidence': 'hoch',
'reason': reason
})
# 2. Protein adequacy
from calculations.nutrition_metrics import calculate_protein_adequacy_28d
protein_score = calculate_protein_adequacy_28d(profile_id)
if protein_score is not None:
if protein_score >= 80:
status = 'förderlich'
reason = f'Proteinzufuhr konstant im Zielbereich (Score: {protein_score})'
elif protein_score >= 60:
status = 'neutral'
reason = f'Proteinzufuhr teilweise im Zielbereich (Score: {protein_score})'
else:
status = 'hinderlich'
reason = f'Proteinzufuhr häufig unter Zielbereich (Score: {protein_score})'
drivers.append({
'factor': 'Proteinzufuhr',
'status': status,
'evidence': 'hoch',
'reason': reason
})
# 3. Sleep duration
from calculations.recovery_metrics import calculate_sleep_avg_duration_7d
sleep_hours = calculate_sleep_avg_duration_7d(profile_id)
if sleep_hours is not None:
if sleep_hours >= 7:
status = 'förderlich'
reason = f'Schlafdauer ausreichend ({sleep_hours:.1f}h/Nacht)'
elif sleep_hours >= 6.5:
status = 'neutral'
reason = f'Schlafdauer knapp ausreichend ({sleep_hours:.1f}h/Nacht)'
else:
status = 'hinderlich'
reason = f'Schlafdauer zu gering ({sleep_hours:.1f}h/Nacht < 7h Empfehlung)'
drivers.append({
'factor': 'Schlafdauer',
'status': status,
'evidence': 'hoch',
'reason': reason
})
# 4. Sleep regularity
from calculations.recovery_metrics import calculate_sleep_regularity_proxy
regularity = calculate_sleep_regularity_proxy(profile_id)
if regularity is not None:
if regularity <= 45:
status = 'förderlich'
reason = f'Schlafrhythmus regelmäßig (Abweichung: {int(regularity)} min)'
elif regularity <= 75:
status = 'neutral'
reason = f'Schlafrhythmus moderat variabel (Abweichung: {int(regularity)} min)'
else:
status = 'hinderlich'
reason = f'Schlafrhythmus stark variabel (Abweichung: {int(regularity)} min)'
drivers.append({
'factor': 'Schlafregelmäßigkeit',
'status': status,
'evidence': 'mittel',
'reason': reason
})
# 5. Training consistency
from calculations.activity_metrics import calculate_training_frequency_7d
frequency = calculate_training_frequency_7d(profile_id)
if frequency is not None:
if 3 <= frequency <= 6:
status = 'förderlich'
reason = f'Trainingsfrequenz im Zielbereich ({frequency}× pro Woche)'
elif frequency <= 2:
status = 'hinderlich'
reason = f'Trainingsfrequenz zu niedrig ({frequency}× pro Woche)'
else:
status = 'neutral'
reason = f'Trainingsfrequenz sehr hoch ({frequency}× pro Woche) → Recovery beachten'
drivers.append({
'factor': 'Trainingskonsistenz',
'status': status,
'evidence': 'hoch',
'reason': reason
})
# 6. Quality sessions
from calculations.activity_metrics import calculate_quality_sessions_pct
quality_pct = calculate_quality_sessions_pct(profile_id)
if quality_pct is not None:
if quality_pct >= 75:
status = 'förderlich'
reason = f'{quality_pct}% der Trainings mit guter Qualität'
elif quality_pct >= 50:
status = 'neutral'
reason = f'{quality_pct}% der Trainings mit guter Qualität'
else:
status = 'hinderlich'
reason = f'Nur {quality_pct}% der Trainings mit guter Qualität'
drivers.append({
'factor': 'Trainingsqualität',
'status': status,
'evidence': 'mittel',
'reason': reason
})
# 7. Recovery score
from calculations.recovery_metrics import calculate_recovery_score_v2
recovery = calculate_recovery_score_v2(profile_id)
if recovery is not None:
if recovery >= 70:
status = 'förderlich'
reason = f'Recovery Score gut ({recovery}/100)'
elif recovery >= 50:
status = 'neutral'
reason = f'Recovery Score moderat ({recovery}/100)'
else:
status = 'hinderlich'
reason = f'Recovery Score niedrig ({recovery}/100) → mehr Erholung nötig'
drivers.append({
'factor': 'Recovery',
'status': status,
'evidence': 'hoch',
'reason': reason
})
# 8. Rest day compliance
from calculations.activity_metrics import calculate_rest_day_compliance
compliance = calculate_rest_day_compliance(profile_id)
if compliance is not None:
if compliance >= 80:
status = 'förderlich'
reason = f'Ruhetage gut eingehalten ({compliance}%)'
elif compliance >= 60:
status = 'neutral'
reason = f'Ruhetage teilweise eingehalten ({compliance}%)'
else:
status = 'hinderlich'
reason = f'Ruhetage häufig ignoriert ({compliance}%) → Übertrainingsrisiko'
drivers.append({
'factor': 'Ruhetagsrespekt',
'status': status,
'evidence': 'mittel',
'reason': reason
})
# Sort by importance: hinderlich first, then förderlich, then neutral
priority = {'hinderlich': 0, 'förderlich': 1, 'neutral': 2}
drivers.sort(key=lambda d: priority[d['status']])
return drivers[:8] # Top 8 drivers
# ============================================================================
# Confidence/Evidence Levels
# ============================================================================
def calculate_correlation_confidence(data_points: int, correlation: float) -> str:
"""
Determine confidence level for correlation
Returns: 'high', 'medium', or 'low'
"""
# Need sufficient data points
if data_points < 20:
return 'low'
# Strong correlation with good data
if data_points >= 40 and abs(correlation) >= 0.5:
return 'high'
elif data_points >= 30 and abs(correlation) >= 0.4:
return 'medium'
else:
return 'low'