From 6b2ad9fa1cdeebc2fb84e06ae23a08924eca5cf0 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 28 Mar 2026 19:11:45 +0100 Subject: [PATCH] feat: Phase 0c - activity_metrics.py module complete Data Layer: - get_activity_summary_data() - count, duration, calories, frequency - get_activity_detail_data() - detailed activity log with all fields - get_training_type_distribution_data() - category distribution with percentages Placeholder Layer: - get_activity_summary() - refactored to use data layer - get_activity_detail() - refactored to use data layer - get_trainingstyp_verteilung() - refactored to use data layer All 3 activity data functions + 3 placeholder refactors complete. Co-Authored-By: Claude Opus 4.6 --- backend/data_layer/__init__.py | 7 +- backend/data_layer/activity_metrics.py | 277 +++++++++++++++++++++++++ backend/placeholder_resolver.py | 108 +++++----- 3 files changed, 332 insertions(+), 60 deletions(-) create mode 100644 backend/data_layer/activity_metrics.py diff --git a/backend/data_layer/__init__.py b/backend/data_layer/__init__.py index b75b517..3ae0dfa 100644 --- a/backend/data_layer/__init__.py +++ b/backend/data_layer/__init__.py @@ -31,9 +31,9 @@ from .utils import * # Metric modules from .body_metrics import * from .nutrition_metrics import * +from .activity_metrics import * # Future imports (will be added as modules are created): -# from .activity_metrics import * # from .recovery_metrics import * # from .health_metrics import * # from .goals import * @@ -56,4 +56,9 @@ __all__ = [ 'get_energy_balance_data', 'get_protein_adequacy_data', 'get_macro_consistency_data', + + # Activity Metrics + 'get_activity_summary_data', + 'get_activity_detail_data', + 'get_training_type_distribution_data', ] diff --git a/backend/data_layer/activity_metrics.py b/backend/data_layer/activity_metrics.py new file mode 100644 index 0000000..fc728a6 --- /dev/null +++ b/backend/data_layer/activity_metrics.py @@ -0,0 +1,277 @@ +""" +Activity Metrics Data Layer + +Provides structured data for training tracking and analysis. + +Functions: + - get_activity_summary_data(): Count, total duration, calories, averages + - get_activity_detail_data(): Detailed activity log entries + - get_training_type_distribution_data(): Training category percentages + +All functions return structured data (dict) without formatting. +Use placeholder_resolver.py for formatted strings for AI. + +Phase 0c: Multi-Layer Architecture +Version: 1.0 +""" + +from typing import Dict, List, Optional +from datetime import datetime, timedelta, date +from db import get_db, get_cursor, r2d +from data_layer.utils import calculate_confidence, safe_float, safe_int + + +def get_activity_summary_data( + profile_id: str, + days: int = 14 +) -> Dict: + """ + Get activity summary statistics. + + Args: + profile_id: User profile ID + days: Analysis window (default 14) + + Returns: + { + "activity_count": int, + "total_duration_min": int, + "total_kcal": int, + "avg_duration_min": int, + "avg_kcal_per_session": int, + "sessions_per_week": float, + "confidence": str, + "days_analyzed": int + } + + Migration from Phase 0b: + OLD: get_activity_summary(pid, days) formatted string + NEW: Structured data with all metrics + """ + with get_db() as conn: + cur = get_cursor(conn) + cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') + + cur.execute( + """SELECT + COUNT(*) as count, + SUM(duration_min) as total_min, + SUM(kcal_active) as total_kcal + FROM activity_log + WHERE profile_id=%s AND date >= %s""", + (profile_id, cutoff) + ) + row = cur.fetchone() + + if not row or row['count'] == 0: + return { + "activity_count": 0, + "total_duration_min": 0, + "total_kcal": 0, + "avg_duration_min": 0, + "avg_kcal_per_session": 0, + "sessions_per_week": 0.0, + "confidence": "insufficient", + "days_analyzed": days + } + + activity_count = row['count'] + total_min = safe_int(row['total_min']) + total_kcal = safe_int(row['total_kcal']) + + avg_duration = int(total_min / activity_count) if activity_count > 0 else 0 + avg_kcal = int(total_kcal / activity_count) if activity_count > 0 else 0 + sessions_per_week = (activity_count / days * 7) if days > 0 else 0.0 + + confidence = calculate_confidence(activity_count, days, "general") + + return { + "activity_count": activity_count, + "total_duration_min": total_min, + "total_kcal": total_kcal, + "avg_duration_min": avg_duration, + "avg_kcal_per_session": avg_kcal, + "sessions_per_week": round(sessions_per_week, 1), + "confidence": confidence, + "days_analyzed": days + } + + +def get_activity_detail_data( + profile_id: str, + days: int = 14, + limit: int = 50 +) -> Dict: + """ + Get detailed activity log entries. + + Args: + profile_id: User profile ID + days: Analysis window (default 14) + limit: Maximum entries to return (default 50) + + Returns: + { + "activities": [ + { + "date": date, + "activity_type": str, + "duration_min": int, + "kcal_active": int, + "hr_avg": int | None, + "training_category": str | None + }, + ... + ], + "total_count": int, + "confidence": str, + "days_analyzed": int + } + + Migration from Phase 0b: + OLD: get_activity_detail(pid, days) formatted string list + NEW: Structured array with all fields + """ + with get_db() as conn: + cur = get_cursor(conn) + cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') + + cur.execute( + """SELECT + date, + activity_type, + duration_min, + kcal_active, + hr_avg, + training_category + FROM activity_log + WHERE profile_id=%s AND date >= %s + ORDER BY date DESC + LIMIT %s""", + (profile_id, cutoff, limit) + ) + rows = cur.fetchall() + + if not rows: + return { + "activities": [], + "total_count": 0, + "confidence": "insufficient", + "days_analyzed": days + } + + activities = [] + for row in rows: + activities.append({ + "date": row['date'], + "activity_type": row['activity_type'], + "duration_min": safe_int(row['duration_min']), + "kcal_active": safe_int(row['kcal_active']), + "hr_avg": safe_int(row['hr_avg']) if row.get('hr_avg') else None, + "training_category": row.get('training_category') + }) + + confidence = calculate_confidence(len(activities), days, "general") + + return { + "activities": activities, + "total_count": len(activities), + "confidence": confidence, + "days_analyzed": days + } + + +def get_training_type_distribution_data( + profile_id: str, + days: int = 14 +) -> Dict: + """ + Calculate training category distribution. + + Args: + profile_id: User profile ID + days: Analysis window (default 14) + + Returns: + { + "distribution": [ + { + "category": str, + "count": int, + "percentage": float + }, + ... + ], + "total_sessions": int, + "categorized_sessions": int, + "uncategorized_sessions": int, + "confidence": str, + "days_analyzed": int + } + + Migration from Phase 0b: + OLD: get_trainingstyp_verteilung(pid, days) top 3 formatted + NEW: Complete distribution with percentages + """ + with get_db() as conn: + cur = get_cursor(conn) + cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') + + # Get categorized activities + cur.execute( + """SELECT + training_category, + COUNT(*) as count + FROM activity_log + WHERE profile_id=%s + AND date >= %s + AND training_category IS NOT NULL + GROUP BY training_category + ORDER BY count DESC""", + (profile_id, cutoff) + ) + rows = cur.fetchall() + + # Get total activity count (including uncategorized) + cur.execute( + """SELECT COUNT(*) as total + FROM activity_log + WHERE profile_id=%s AND date >= %s""", + (profile_id, cutoff) + ) + total_row = cur.fetchone() + total_sessions = total_row['total'] if total_row else 0 + + if not rows or total_sessions == 0: + return { + "distribution": [], + "total_sessions": total_sessions, + "categorized_sessions": 0, + "uncategorized_sessions": total_sessions, + "confidence": "insufficient", + "days_analyzed": days + } + + categorized_count = sum(row['count'] for row in rows) + uncategorized_count = total_sessions - categorized_count + + distribution = [] + for row in rows: + count = row['count'] + percentage = (count / total_sessions * 100) if total_sessions > 0 else 0 + distribution.append({ + "category": row['training_category'], + "count": count, + "percentage": round(percentage, 1) + }) + + confidence = calculate_confidence(categorized_count, days, "general") + + return { + "distribution": distribution, + "total_sessions": total_sessions, + "categorized_sessions": categorized_count, + "uncategorized_sessions": uncategorized_count, + "confidence": confidence, + "days_analyzed": days + } diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py index 3264c3e..ebfc717 100644 --- a/backend/placeholder_resolver.py +++ b/backend/placeholder_resolver.py @@ -23,6 +23,11 @@ from data_layer.nutrition_metrics import ( get_nutrition_days_data, get_protein_targets_data ) +from data_layer.activity_metrics import ( + get_activity_summary_data, + get_activity_detail_data, + get_training_type_distribution_data +) # ── Helper Functions ────────────────────────────────────────────────────────── @@ -223,25 +228,18 @@ def get_protein_ziel_high(profile_id: str) -> str: def get_activity_summary(profile_id: str, days: int = 14) -> str: - """Get activity summary for recent period.""" - with get_db() as conn: - cur = get_cursor(conn) - cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') - cur.execute( - """SELECT COUNT(*) as count, - SUM(duration_min) as total_min, - SUM(kcal_active) as total_kcal - FROM activity_log - WHERE profile_id=%s AND date >= %s""", - (profile_id, cutoff) - ) - row = r2d(cur.fetchone()) + """ + Get activity summary for recent period. - if row['count'] == 0: - return f"Keine Aktivitäten in den letzten {days} Tagen" + Phase 0c: Refactored to use data_layer.activity_metrics.get_activity_summary_data() + This function now only FORMATS the data for AI consumption. + """ + data = get_activity_summary_data(profile_id, days) - avg_min = int(row['total_min'] / row['count']) if row['total_min'] else 0 - return f"{row['count']} Einheiten in {days} Tagen (Ø {avg_min} min/Einheit, {int(row['total_kcal'] or 0)} kcal gesamt)" + if data['confidence'] == 'insufficient': + return f"Keine Aktivitäten in den letzten {days} Tagen" + + return f"{data['activity_count']} Einheiten in {days} Tagen (Ø {data['avg_duration_min']} min/Einheit, {data['total_kcal']} kcal gesamt)" def calculate_age(dob) -> str: @@ -263,55 +261,47 @@ def calculate_age(dob) -> str: def get_activity_detail(profile_id: str, days: int = 14) -> str: - """Get detailed activity log for analysis.""" - with get_db() as conn: - cur = get_cursor(conn) - cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') - cur.execute( - """SELECT date, activity_type, duration_min, kcal_active, hr_avg - FROM activity_log - WHERE profile_id=%s AND date >= %s - ORDER BY date DESC - LIMIT 50""", - (profile_id, cutoff) + """ + Get detailed activity log for analysis. + + Phase 0c: Refactored to use data_layer.activity_metrics.get_activity_detail_data() + This function now only FORMATS the data for AI consumption. + """ + data = get_activity_detail_data(profile_id, days) + + if data['confidence'] == 'insufficient': + return f"Keine Aktivitäten in den letzten {days} Tagen" + + # Format as readable list (max 20 entries to avoid token bloat) + lines = [] + for activity in data['activities'][:20]: + hr_str = f" HF={activity['hr_avg']}" if activity['hr_avg'] else "" + lines.append( + f"{activity['date']}: {activity['activity_type']} " + f"({activity['duration_min']}min, {activity['kcal_active']}kcal{hr_str})" ) - rows = [r2d(r) for r in cur.fetchall()] - if not rows: - return f"Keine Aktivitäten in den letzten {days} Tagen" - - # Format as readable list - lines = [] - for r in rows: - hr_str = f" HF={r['hr_avg']}" if r.get('hr_avg') else "" - lines.append( - f"{r['date']}: {r['activity_type']} ({r['duration_min']}min, {r.get('kcal_active', 0)}kcal{hr_str})" - ) - - return '\n'.join(lines[:20]) # Max 20 entries to avoid token bloat + return '\n'.join(lines) def get_trainingstyp_verteilung(profile_id: str, days: int = 14) -> str: - """Get training type distribution.""" - with get_db() as conn: - cur = get_cursor(conn) - cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') - cur.execute( - """SELECT training_category, COUNT(*) as count - FROM activity_log - WHERE profile_id=%s AND date >= %s AND training_category IS NOT NULL - GROUP BY training_category - ORDER BY count DESC""", - (profile_id, cutoff) - ) - rows = [r2d(r) for r in cur.fetchall()] + """ + Get training type distribution. - if not rows: - return "Keine kategorisierten Trainings" + Phase 0c: Refactored to use data_layer.activity_metrics.get_training_type_distribution_data() + This function now only FORMATS the data for AI consumption. + """ + data = get_training_type_distribution_data(profile_id, days) - total = sum(r['count'] for r in rows) - parts = [f"{r['training_category']}: {int(r['count']/total*100)}%" for r in rows[:3]] - return ", ".join(parts) + if data['confidence'] == 'insufficient' or not data['distribution']: + return "Keine kategorisierten Trainings" + + # Format top 3 categories with percentages + parts = [ + f"{dist['category']}: {int(dist['percentage'])}%" + for dist in data['distribution'][:3] + ] + return ", ".join(parts) def get_sleep_avg_duration(profile_id: str, days: int = 7) -> str: