""" Activity Metrics Data Layer Provides structured data for training tracking and analysis. Functions: - get_activity_summary_data(): Count, total duration, calories, averages - get_activity_detail_data(): Detailed activity log entries - get_training_type_distribution_data(): Training category percentages All functions return structured data (dict) without formatting. Use placeholder_resolver.py for formatted strings for AI. Phase 0c: Multi-Layer Architecture Version: 1.0 """ from typing import Dict, List, Optional from datetime import datetime, timedelta, date import statistics from db import get_db, get_cursor, r2d from data_layer.utils import calculate_confidence, safe_float, safe_int def get_activity_summary_data( profile_id: str, days: int = 14 ) -> Dict: """ Get activity summary statistics. Args: profile_id: User profile ID days: Analysis window (default 14) Returns: { "activity_count": int, "total_duration_min": int, "total_kcal": int, "avg_duration_min": int, "avg_kcal_per_session": int, "sessions_per_week": float, "confidence": str, "days_analyzed": int } Migration from Phase 0b: OLD: get_activity_summary(pid, days) formatted string NEW: Structured data with all metrics """ with get_db() as conn: cur = get_cursor(conn) cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') cur.execute( """SELECT COUNT(*) as count, SUM(duration_min) as total_min, SUM(kcal_active) as total_kcal FROM activity_log WHERE profile_id=%s AND date >= %s""", (profile_id, cutoff) ) row = cur.fetchone() if not row or row['count'] == 0: return { "activity_count": 0, "total_duration_min": 0, "total_kcal": 0, "avg_duration_min": 0, "avg_kcal_per_session": 0, "sessions_per_week": 0.0, "confidence": "insufficient", "days_analyzed": days } activity_count = row['count'] total_min = safe_int(row['total_min']) total_kcal = safe_int(row['total_kcal']) avg_duration = int(total_min / activity_count) if activity_count > 0 else 0 avg_kcal = int(total_kcal / activity_count) if activity_count > 0 else 0 sessions_per_week = (activity_count / days * 7) if days > 0 else 0.0 confidence = calculate_confidence(activity_count, days, "general") return { "activity_count": activity_count, "total_duration_min": total_min, "total_kcal": total_kcal, "avg_duration_min": avg_duration, "avg_kcal_per_session": avg_kcal, "sessions_per_week": round(sessions_per_week, 1), "confidence": confidence, "days_analyzed": days } def get_activity_detail_data( profile_id: str, days: int = 14, limit: int = 50 ) -> Dict: """ Get detailed activity log entries. Args: profile_id: User profile ID days: Analysis window (default 14) limit: Maximum entries to return (default 50) Returns: { "activities": [ { "date": date, "activity_type": str, "duration_min": int, "kcal_active": int, "hr_avg": int | None, "training_category": str | None }, ... ], "total_count": int, "confidence": str, "days_analyzed": int } Migration from Phase 0b: OLD: get_activity_detail(pid, days) formatted string list NEW: Structured array with all fields """ with get_db() as conn: cur = get_cursor(conn) cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') cur.execute( """SELECT date, activity_type, duration_min, kcal_active, hr_avg, training_category FROM activity_log WHERE profile_id=%s AND date >= %s ORDER BY date DESC LIMIT %s""", (profile_id, cutoff, limit) ) rows = cur.fetchall() if not rows: return { "activities": [], "total_count": 0, "confidence": "insufficient", "days_analyzed": days } activities = [] for row in rows: activities.append({ "date": row['date'], "activity_type": row['activity_type'], "duration_min": safe_int(row['duration_min']), "kcal_active": safe_int(row['kcal_active']), "hr_avg": safe_int(row['hr_avg']) if row.get('hr_avg') else None, "training_category": row.get('training_category') }) confidence = calculate_confidence(len(activities), days, "general") return { "activities": activities, "total_count": len(activities), "confidence": confidence, "days_analyzed": days } def get_training_type_distribution_data( profile_id: str, days: int = 14 ) -> Dict: """ Calculate training category distribution. Args: profile_id: User profile ID days: Analysis window (default 14) Returns: { "distribution": [ { "category": str, "count": int, "percentage": float }, ... ], "total_sessions": int, "categorized_sessions": int, "uncategorized_sessions": int, "confidence": str, "days_analyzed": int } Migration from Phase 0b: OLD: get_trainingstyp_verteilung(pid, days) top 3 formatted NEW: Complete distribution with percentages """ with get_db() as conn: cur = get_cursor(conn) cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') # Get categorized activities cur.execute( """SELECT training_category, COUNT(*) as count FROM activity_log WHERE profile_id=%s AND date >= %s AND training_category IS NOT NULL GROUP BY training_category ORDER BY count DESC""", (profile_id, cutoff) ) rows = cur.fetchall() # Get total activity count (including uncategorized) cur.execute( """SELECT COUNT(*) as total FROM activity_log WHERE profile_id=%s AND date >= %s""", (profile_id, cutoff) ) total_row = cur.fetchone() total_sessions = total_row['total'] if total_row else 0 if not rows or total_sessions == 0: return { "distribution": [], "total_sessions": total_sessions, "categorized_sessions": 0, "uncategorized_sessions": total_sessions, "confidence": "insufficient", "days_analyzed": days } categorized_count = sum(row['count'] for row in rows) uncategorized_count = total_sessions - categorized_count distribution = [] for row in rows: count = row['count'] percentage = (count / total_sessions * 100) if total_sessions > 0 else 0 distribution.append({ "category": row['training_category'], "count": count, "percentage": round(percentage, 1) }) confidence = calculate_confidence(categorized_count, days, "general") return { "distribution": distribution, "total_sessions": total_sessions, "categorized_sessions": categorized_count, "uncategorized_sessions": uncategorized_count, "confidence": confidence, "days_analyzed": days } # ============================================================================ # Calculated Metrics (migrated from calculations/activity_metrics.py) # ============================================================================ # These functions return simple values for placeholders and scoring. # Use get_*_data() functions above for structured chart data. def calculate_training_minutes_week(profile_id: str) -> Optional[int]: """Calculate total training minutes last 7 days""" with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT SUM(duration_min) as total_minutes FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '7 days' """, (profile_id,)) row = cur.fetchone() return int(row['total_minutes']) if row and row['total_minutes'] else None def calculate_training_frequency_7d(profile_id: str) -> Optional[int]: """Calculate number of training sessions last 7 days""" with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT COUNT(*) as session_count FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '7 days' """, (profile_id,)) row = cur.fetchone() return int(row['session_count']) if row else None def calculate_quality_sessions_pct(profile_id: str) -> Optional[int]: """Calculate percentage of quality sessions (good or better) last 28 days""" with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT COUNT(*) as total, COUNT(*) FILTER (WHERE quality_label IN ('excellent', 'very_good', 'good')) as quality_count FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '28 days' """, (profile_id,)) row = cur.fetchone() if not row or row['total'] == 0: return None pct = (row['quality_count'] / row['total']) * 100 return int(pct) # ============================================================================ # A2: Intensity Distribution (Proxy-based) # ============================================================================ def calculate_intensity_proxy_distribution(profile_id: str) -> Optional[Dict]: """ Calculate intensity distribution (proxy until HR zones available) Returns dict: {'low': X, 'moderate': Y, 'high': Z} in minutes """ with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT duration_min, hr_avg, hr_max FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '28 days' """, (profile_id,)) activities = cur.fetchall() if not activities: return None low_min = 0 moderate_min = 0 high_min = 0 for activity in activities: duration = activity['duration_min'] avg_hr = activity['hr_avg'] max_hr = activity['hr_max'] # Simple proxy classification if avg_hr: # Rough HR-based classification (assumes max HR ~190) if avg_hr < 120: low_min += duration elif avg_hr < 150: moderate_min += duration else: high_min += duration else: # Fallback: assume moderate moderate_min += duration return { 'low': low_min, 'moderate': moderate_min, 'high': high_min } # ============================================================================ # A4: Ability Balance Calculations # ============================================================================ def calculate_ability_balance(profile_id: str) -> Optional[Dict]: """ Calculate ability balance from training_types.abilities Returns dict with scores per ability dimension (0-100) """ with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT a.duration_min, tt.abilities FROM activity_log a JOIN training_types tt ON a.training_category = tt.category WHERE a.profile_id = %s AND a.date >= CURRENT_DATE - INTERVAL '28 days' AND tt.abilities IS NOT NULL """, (profile_id,)) activities = cur.fetchall() if not activities: return None # Accumulate ability load (duration × ability weight) ability_loads = { 'strength': 0, 'endurance': 0, 'mental': 0, 'coordination': 0, 'mobility': 0 } for activity in activities: duration = activity['duration_min'] abilities = activity['abilities'] # JSONB if not abilities: continue for ability, weight in abilities.items(): if ability in ability_loads: ability_loads[ability] += duration * weight # Normalize to 0-100 scale max_load = max(ability_loads.values()) if ability_loads else 1 if max_load == 0: return None normalized = { ability: int((load / max_load) * 100) for ability, load in ability_loads.items() } return normalized def calculate_ability_balance_strength(profile_id: str) -> Optional[int]: """Get strength ability score""" balance = calculate_ability_balance(profile_id) return balance['strength'] if balance else None def calculate_ability_balance_endurance(profile_id: str) -> Optional[int]: """Get endurance ability score""" balance = calculate_ability_balance(profile_id) return balance['endurance'] if balance else None def calculate_ability_balance_mental(profile_id: str) -> Optional[int]: """Get mental ability score""" balance = calculate_ability_balance(profile_id) return balance['mental'] if balance else None def calculate_ability_balance_coordination(profile_id: str) -> Optional[int]: """Get coordination ability score""" balance = calculate_ability_balance(profile_id) return balance['coordination'] if balance else None def calculate_ability_balance_mobility(profile_id: str) -> Optional[int]: """Get mobility ability score""" balance = calculate_ability_balance(profile_id) return balance['mobility'] if balance else None # ============================================================================ # A5: Load Monitoring (Proxy-based) # ============================================================================ def calculate_proxy_internal_load_7d(profile_id: str) -> Optional[int]: """ Calculate proxy internal load (last 7 days) Formula: duration × intensity_factor × quality_factor """ intensity_factors = {'low': 1.0, 'moderate': 1.5, 'high': 2.0} quality_factors = { 'excellent': 1.15, 'very_good': 1.05, 'good': 1.0, 'acceptable': 0.9, 'poor': 0.75, 'excluded': 0.0 } with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT duration_min, hr_avg, rpe FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '7 days' """, (profile_id,)) activities = cur.fetchall() if not activities: return None total_load = 0 for activity in activities: duration = activity['duration_min'] avg_hr = activity['hr_avg'] # Map RPE to quality (rpe 8-10 = excellent, 6-7 = good, 4-5 = moderate, <4 = poor) rpe = activity.get('rpe') if rpe and rpe >= 8: quality = 'excellent' elif rpe and rpe >= 6: quality = 'good' elif rpe and rpe >= 4: quality = 'moderate' else: quality = 'good' # default # Determine intensity if avg_hr: if avg_hr < 120: intensity = 'low' elif avg_hr < 150: intensity = 'moderate' else: intensity = 'high' else: intensity = 'moderate' load = float(duration) * intensity_factors[intensity] * quality_factors.get(quality, 1.0) total_load += load return int(total_load) def calculate_monotony_score(profile_id: str) -> Optional[float]: """ Calculate training monotony (last 7 days) Monotony = mean daily load / std dev daily load Higher = more monotonous """ with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT date, SUM(duration_min) as daily_duration FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '7 days' GROUP BY date ORDER BY date """, (profile_id,)) daily_loads = [float(row['daily_duration']) for row in cur.fetchall() if row['daily_duration']] if len(daily_loads) < 4: return None mean_load = sum(daily_loads) / len(daily_loads) std_dev = statistics.stdev(daily_loads) if std_dev == 0: return None monotony = mean_load / std_dev return round(monotony, 2) def calculate_strain_score(profile_id: str) -> Optional[int]: """ Calculate training strain (last 7 days) Strain = weekly load × monotony """ weekly_load = calculate_proxy_internal_load_7d(profile_id) monotony = calculate_monotony_score(profile_id) if weekly_load is None or monotony is None: return None strain = weekly_load * monotony return int(strain) # ============================================================================ # A6: Activity Goal Alignment Score (Dynamic Focus Areas) # ============================================================================ def calculate_activity_score(profile_id: str, focus_weights: Optional[Dict] = None) -> Optional[int]: """ Activity goal alignment score 0-100 Weighted by user's activity-related focus areas """ if focus_weights is None: from data_layer.scores import get_user_focus_weights focus_weights = get_user_focus_weights(profile_id) # Activity-related focus areas (English keys from DB) # Strength training strength = focus_weights.get('strength', 0) strength_endurance = focus_weights.get('strength_endurance', 0) power = focus_weights.get('power', 0) total_strength = strength + strength_endurance + power # Endurance training aerobic = focus_weights.get('aerobic_endurance', 0) anaerobic = focus_weights.get('anaerobic_endurance', 0) cardiovascular = focus_weights.get('cardiovascular_health', 0) total_cardio = aerobic + anaerobic + cardiovascular # Mobility/Coordination flexibility = focus_weights.get('flexibility', 0) mobility = focus_weights.get('mobility', 0) balance = focus_weights.get('balance', 0) reaction = focus_weights.get('reaction', 0) rhythm = focus_weights.get('rhythm', 0) coordination = focus_weights.get('coordination', 0) total_ability = flexibility + mobility + balance + reaction + rhythm + coordination total_activity_weight = total_strength + total_cardio + total_ability if total_activity_weight == 0: return None # No activity goals components = [] # 1. Weekly minutes (general activity volume) minutes = calculate_training_minutes_week(profile_id) if minutes is not None: # WHO: 150-300 min/week if 150 <= minutes <= 300: minutes_score = 100 elif minutes < 150: minutes_score = max(40, (minutes / 150) * 100) else: minutes_score = max(80, 100 - ((minutes - 300) / 10)) # Volume relevant for all activity types (20% base weight) components.append(('minutes', minutes_score, total_activity_weight * 0.2)) # 2. Quality sessions (always relevant) quality_pct = calculate_quality_sessions_pct(profile_id) if quality_pct is not None: # Quality gets 10% base weight components.append(('quality', quality_pct, total_activity_weight * 0.1)) # 3. Strength presence (if strength focus active) if total_strength > 0: strength_score = _score_strength_presence(profile_id) if strength_score is not None: components.append(('strength', strength_score, total_strength)) # 4. Cardio presence (if cardio focus active) if total_cardio > 0: cardio_score = _score_cardio_presence(profile_id) if cardio_score is not None: components.append(('cardio', cardio_score, total_cardio)) # 5. Ability balance (if mobility/coordination focus active) if total_ability > 0: balance_score = _score_ability_balance(profile_id) if balance_score is not None: components.append(('balance', balance_score, total_ability)) if not components: return None # Weighted average total_score = sum(score * weight for _, score, weight in components) total_weight = sum(weight for _, _, weight in components) return int(total_score / total_weight) def _score_strength_presence(profile_id: str) -> Optional[int]: """Score strength training presence (0-100)""" with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT COUNT(DISTINCT date) as strength_days FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '7 days' AND training_category = 'strength' """, (profile_id,)) row = cur.fetchone() if not row: return None strength_days = row['strength_days'] # Target: 2-4 days/week if 2 <= strength_days <= 4: return 100 elif strength_days == 1: return 60 elif strength_days == 5: return 85 elif strength_days == 0: return 0 else: return 70 def _score_cardio_presence(profile_id: str) -> Optional[int]: """Score cardio training presence (0-100)""" with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT COUNT(DISTINCT date) as cardio_days, SUM(duration_min) as cardio_minutes FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '7 days' AND training_category = 'cardio' """, (profile_id,)) row = cur.fetchone() if not row: return None cardio_days = row['cardio_days'] cardio_minutes = row['cardio_minutes'] or 0 # Target: 3-5 days/week, 150+ minutes day_score = min(100, (cardio_days / 4) * 100) minute_score = min(100, (cardio_minutes / 150) * 100) return int((day_score + minute_score) / 2) def _score_ability_balance(profile_id: str) -> Optional[int]: """Score ability balance (0-100)""" balance = calculate_ability_balance(profile_id) if not balance: return None # Good balance = all abilities > 40, std_dev < 30 values = list(balance.values()) min_value = min(values) std_dev = statistics.stdev(values) if len(values) > 1 else 0 # Score based on minimum coverage and balance min_score = min(100, min_value * 2) # Want all > 50 balance_score = max(0, 100 - (std_dev * 2)) # Want low std_dev return int((min_score + balance_score) / 2) # ============================================================================ # A7: Rest Day Compliance # ============================================================================ def calculate_rest_day_compliance(profile_id: str) -> Optional[int]: """ Calculate rest day compliance percentage (last 28 days) Returns percentage of planned rest days that were respected """ with get_db() as conn: cur = get_cursor(conn) # Get planned rest days cur.execute(""" SELECT date, rest_config->>'focus' as rest_type FROM rest_days WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '28 days' """, (profile_id,)) rest_days = {row['date']: row['rest_type'] for row in cur.fetchall()} if not rest_days: return None # Check if training occurred on rest days cur.execute(""" SELECT date, training_category FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '28 days' """, (profile_id,)) training_days = {} for row in cur.fetchall(): if row['date'] not in training_days: training_days[row['date']] = [] training_days[row['date']].append(row['training_category']) # Count compliance compliant = 0 total = len(rest_days) for rest_date, rest_type in rest_days.items(): if rest_date not in training_days: # Full rest = compliant compliant += 1 else: # Check if training violates rest type categories = training_days[rest_date] if rest_type == 'strength_rest' and 'strength' not in categories: compliant += 1 elif rest_type == 'cardio_rest' and 'cardio' not in categories: compliant += 1 # If rest_type == 'recovery', any training = non-compliant compliance_pct = (compliant / total) * 100 return int(compliance_pct) # ============================================================================ # A8: VO2max Development # ============================================================================ def calculate_vo2max_trend_28d(profile_id: str) -> Optional[float]: """Calculate VO2max trend (change over 28 days)""" with get_db() as conn: cur = get_cursor(conn) cur.execute(""" SELECT vo2_max, date FROM vitals_baseline WHERE profile_id = %s AND vo2_max IS NOT NULL AND date >= CURRENT_DATE - INTERVAL '28 days' ORDER BY date DESC """, (profile_id,)) measurements = cur.fetchall() if len(measurements) < 2: return None recent = measurements[0]['vo2_max'] oldest = measurements[-1]['vo2_max'] change = recent - oldest return round(change, 1) # ============================================================================ # Data Quality Assessment # ============================================================================ def calculate_activity_data_quality(profile_id: str) -> Dict[str, any]: """ Assess data quality for activity metrics Returns dict with quality score and details """ with get_db() as conn: cur = get_cursor(conn) # Activity entries last 28 days cur.execute(""" SELECT COUNT(*) as total, COUNT(hr_avg) as with_hr, COUNT(rpe) as with_quality FROM activity_log WHERE profile_id = %s AND date >= CURRENT_DATE - INTERVAL '28 days' """, (profile_id,)) counts = cur.fetchone() total_entries = counts['total'] hr_coverage = counts['with_hr'] / total_entries if total_entries > 0 else 0 quality_coverage = counts['with_quality'] / total_entries if total_entries > 0 else 0 # Score components frequency_score = min(100, (total_entries / 15) * 100) # 15 = ~4 sessions/week hr_score = hr_coverage * 100 quality_score = quality_coverage * 100 # Overall score overall_score = int( frequency_score * 0.5 + hr_score * 0.25 + quality_score * 0.25 ) if overall_score >= 80: confidence = "high" elif overall_score >= 60: confidence = "medium" else: confidence = "low" return { "overall_score": overall_score, "confidence": confidence, "measurements": { "activities_28d": total_entries, "hr_coverage_pct": int(hr_coverage * 100), "quality_coverage_pct": int(quality_coverage * 100) }, "component_scores": { "frequency": int(frequency_score), "hr": int(hr_score), "quality": int(quality_score) } }