From 6b2ad9fa1cdeebc2fb84e06ae23a08924eca5cf0 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 28 Mar 2026 19:11:45 +0100
Subject: [PATCH] feat: Phase 0c - activity_metrics.py module complete

Data Layer:
- get_activity_summary_data() - count, duration, calories, frequency
- get_activity_detail_data() - detailed activity log with all fields
- get_training_type_distribution_data() - category distribution with percentages

Placeholder Layer:
- get_activity_summary() - refactored to use data layer
- get_activity_detail() - refactored to use data layer
- get_trainingstyp_verteilung() - refactored to use data layer

All 3 activity data functions + 3 placeholder refactors complete.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/data_layer/__init__.py         |   7 +-
 backend/data_layer/activity_metrics.py | 277 +++++++++++++++++++++++++
 backend/placeholder_resolver.py        | 108 +++++-----
 3 files changed, 332 insertions(+), 60 deletions(-)
 create mode 100644 backend/data_layer/activity_metrics.py

diff --git a/backend/data_layer/__init__.py b/backend/data_layer/__init__.py
index b75b517..3ae0dfa 100644
--- a/backend/data_layer/__init__.py
+++ b/backend/data_layer/__init__.py
@@ -31,9 +31,9 @@ from .utils import *
 # Metric modules
 from .body_metrics import *
 from .nutrition_metrics import *
+from .activity_metrics import *
 
 # Future imports (will be added as modules are created):
-# from .activity_metrics import *
 # from .recovery_metrics import *
 # from .health_metrics import *
 # from .goals import *
@@ -56,4 +56,9 @@ __all__ = [
     'get_energy_balance_data',
     'get_protein_adequacy_data',
     'get_macro_consistency_data',
+
+    # Activity Metrics
+    'get_activity_summary_data',
+    'get_activity_detail_data',
+    'get_training_type_distribution_data',
 ]
diff --git a/backend/data_layer/activity_metrics.py b/backend/data_layer/activity_metrics.py
new file mode 100644
index 0000000..fc728a6
--- /dev/null
+++ b/backend/data_layer/activity_metrics.py
@@ -0,0 +1,277 @@
+"""
+Activity Metrics Data Layer
+
+Provides structured data for training tracking and analysis.
+
+Functions:
+    - get_activity_summary_data(): Count, total duration, calories, averages
+    - get_activity_detail_data(): Detailed activity log entries
+    - get_training_type_distribution_data(): Training category percentages
+
+All functions return structured data (dict) without formatting.
+Use placeholder_resolver.py for formatted strings for AI.
+
+Phase 0c: Multi-Layer Architecture
+Version: 1.0
+"""
+
+from typing import Dict, List, Optional
+from datetime import datetime, timedelta, date
+from db import get_db, get_cursor, r2d
+from data_layer.utils import calculate_confidence, safe_float, safe_int
+
+
+def get_activity_summary_data(
+    profile_id: str,
+    days: int = 14
+) -> Dict:
+    """
+    Get activity summary statistics.
+
+    Args:
+        profile_id: User profile ID
+        days: Analysis window (default 14)
+
+    Returns:
+        {
+            "activity_count": int,
+            "total_duration_min": int,
+            "total_kcal": int,
+            "avg_duration_min": int,
+            "avg_kcal_per_session": int,
+            "sessions_per_week": float,
+            "confidence": str,
+            "days_analyzed": int
+        }
+
+    Migration from Phase 0b:
+        OLD: get_activity_summary(pid, days) formatted string
+        NEW: Structured data with all metrics
+    """
+    with get_db() as conn:
+        cur = get_cursor(conn)
+        cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
+
+        cur.execute(
+            """SELECT
+                COUNT(*) as count,
+                SUM(duration_min) as total_min,
+                SUM(kcal_active) as total_kcal
+               FROM activity_log
+               WHERE profile_id=%s AND date >= %s""",
+            (profile_id, cutoff)
+        )
+        row = cur.fetchone()
+
+        if not row or row['count'] == 0:
+            return {
+                "activity_count": 0,
+                "total_duration_min": 0,
+                "total_kcal": 0,
+                "avg_duration_min": 0,
+                "avg_kcal_per_session": 0,
+                "sessions_per_week": 0.0,
+                "confidence": "insufficient",
+                "days_analyzed": days
+            }
+
+        activity_count = row['count']
+        total_min = safe_int(row['total_min'])
+        total_kcal = safe_int(row['total_kcal'])
+
+        avg_duration = int(total_min / activity_count) if activity_count > 0 else 0
+        avg_kcal = int(total_kcal / activity_count) if activity_count > 0 else 0
+        sessions_per_week = (activity_count / days * 7) if days > 0 else 0.0
+
+        confidence = calculate_confidence(activity_count, days, "general")
+
+        return {
+            "activity_count": activity_count,
+            "total_duration_min": total_min,
+            "total_kcal": total_kcal,
+            "avg_duration_min": avg_duration,
+            "avg_kcal_per_session": avg_kcal,
+            "sessions_per_week": round(sessions_per_week, 1),
+            "confidence": confidence,
+            "days_analyzed": days
+        }
+
+
+def get_activity_detail_data(
+    profile_id: str,
+    days: int = 14,
+    limit: int = 50
+) -> Dict:
+    """
+    Get detailed activity log entries.
+
+    Args:
+        profile_id: User profile ID
+        days: Analysis window (default 14)
+        limit: Maximum entries to return (default 50)
+
+    Returns:
+        {
+            "activities": [
+                {
+                    "date": date,
+                    "activity_type": str,
+                    "duration_min": int,
+                    "kcal_active": int,
+                    "hr_avg": int | None,
+                    "training_category": str | None
+                },
+                ...
+            ],
+            "total_count": int,
+            "confidence": str,
+            "days_analyzed": int
+        }
+
+    Migration from Phase 0b:
+        OLD: get_activity_detail(pid, days) formatted string list
+        NEW: Structured array with all fields
+    """
+    with get_db() as conn:
+        cur = get_cursor(conn)
+        cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
+
+        cur.execute(
+            """SELECT
+                date,
+                activity_type,
+                duration_min,
+                kcal_active,
+                hr_avg,
+                training_category
+               FROM activity_log
+               WHERE profile_id=%s AND date >= %s
+               ORDER BY date DESC
+               LIMIT %s""",
+            (profile_id, cutoff, limit)
+        )
+        rows = cur.fetchall()
+
+        if not rows:
+            return {
+                "activities": [],
+                "total_count": 0,
+                "confidence": "insufficient",
+                "days_analyzed": days
+            }
+
+        activities = []
+        for row in rows:
+            activities.append({
+                "date": row['date'],
+                "activity_type": row['activity_type'],
+                "duration_min": safe_int(row['duration_min']),
+                "kcal_active": safe_int(row['kcal_active']),
+                "hr_avg": safe_int(row['hr_avg']) if row.get('hr_avg') else None,
+                "training_category": row.get('training_category')
+            })
+
+        confidence = calculate_confidence(len(activities), days, "general")
+
+        return {
+            "activities": activities,
+            "total_count": len(activities),
+            "confidence": confidence,
+            "days_analyzed": days
+        }
+
+
+def get_training_type_distribution_data(
+    profile_id: str,
+    days: int = 14
+) -> Dict:
+    """
+    Calculate training category distribution.
+
+    Args:
+        profile_id: User profile ID
+        days: Analysis window (default 14)
+
+    Returns:
+        {
+            "distribution": [
+                {
+                    "category": str,
+                    "count": int,
+                    "percentage": float
+                },
+                ...
+            ],
+            "total_sessions": int,
+            "categorized_sessions": int,
+            "uncategorized_sessions": int,
+            "confidence": str,
+            "days_analyzed": int
+        }
+
+    Migration from Phase 0b:
+        OLD: get_trainingstyp_verteilung(pid, days) top 3 formatted
+        NEW: Complete distribution with percentages
+    """
+    with get_db() as conn:
+        cur = get_cursor(conn)
+        cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
+
+        # Get categorized activities
+        cur.execute(
+            """SELECT
+                training_category,
+                COUNT(*) as count
+               FROM activity_log
+               WHERE profile_id=%s
+                 AND date >= %s
+                 AND training_category IS NOT NULL
+               GROUP BY training_category
+               ORDER BY count DESC""",
+            (profile_id, cutoff)
+        )
+        rows = cur.fetchall()
+
+        # Get total activity count (including uncategorized)
+        cur.execute(
+            """SELECT COUNT(*) as total
+               FROM activity_log
+               WHERE profile_id=%s AND date >= %s""",
+            (profile_id, cutoff)
+        )
+        total_row = cur.fetchone()
+        total_sessions = total_row['total'] if total_row else 0
+
+        if not rows or total_sessions == 0:
+            return {
+                "distribution": [],
+                "total_sessions": total_sessions,
+                "categorized_sessions": 0,
+                "uncategorized_sessions": total_sessions,
+                "confidence": "insufficient",
+                "days_analyzed": days
+            }
+
+        categorized_count = sum(row['count'] for row in rows)
+        uncategorized_count = total_sessions - categorized_count
+
+        distribution = []
+        for row in rows:
+            count = row['count']
+            percentage = (count / total_sessions * 100) if total_sessions > 0 else 0
+            distribution.append({
+                "category": row['training_category'],
+                "count": count,
+                "percentage": round(percentage, 1)
+            })
+
+        confidence = calculate_confidence(categorized_count, days, "general")
+
+        return {
+            "distribution": distribution,
+            "total_sessions": total_sessions,
+            "categorized_sessions": categorized_count,
+            "uncategorized_sessions": uncategorized_count,
+            "confidence": confidence,
+            "days_analyzed": days
+        }
diff --git a/backend/placeholder_resolver.py b/backend/placeholder_resolver.py
index 3264c3e..ebfc717 100644
--- a/backend/placeholder_resolver.py
+++ b/backend/placeholder_resolver.py
@@ -23,6 +23,11 @@ from data_layer.nutrition_metrics import (
     get_nutrition_days_data,
     get_protein_targets_data
 )
+from data_layer.activity_metrics import (
+    get_activity_summary_data,
+    get_activity_detail_data,
+    get_training_type_distribution_data
+)
 
 
 # ── Helper Functions ──────────────────────────────────────────────────────────
@@ -223,25 +228,18 @@ def get_protein_ziel_high(profile_id: str) -> str:
 
 
 def get_activity_summary(profile_id: str, days: int = 14) -> str:
-    """Get activity summary for recent period."""
-    with get_db() as conn:
-        cur = get_cursor(conn)
-        cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
-        cur.execute(
-            """SELECT COUNT(*) as count,
-                      SUM(duration_min) as total_min,
-                      SUM(kcal_active) as total_kcal
-               FROM activity_log
-               WHERE profile_id=%s AND date >= %s""",
-            (profile_id, cutoff)
-        )
-        row = r2d(cur.fetchone())
+    """
+    Get activity summary for recent period.
 
-        if row['count'] == 0:
-            return f"Keine Aktivitäten in den letzten {days} Tagen"
+    Phase 0c: Refactored to use data_layer.activity_metrics.get_activity_summary_data()
+    This function now only FORMATS the data for AI consumption.
+    """
+    data = get_activity_summary_data(profile_id, days)
 
-        avg_min = int(row['total_min'] / row['count']) if row['total_min'] else 0
-        return f"{row['count']} Einheiten in {days} Tagen (Ø {avg_min} min/Einheit, {int(row['total_kcal'] or 0)} kcal gesamt)"
+    if data['confidence'] == 'insufficient':
+        return f"Keine Aktivitäten in den letzten {days} Tagen"
+
+    return f"{data['activity_count']} Einheiten in {days} Tagen (Ø {data['avg_duration_min']} min/Einheit, {data['total_kcal']} kcal gesamt)"
 
 
 def calculate_age(dob) -> str:
@@ -263,55 +261,47 @@ def calculate_age(dob) -> str:
 
 
 def get_activity_detail(profile_id: str, days: int = 14) -> str:
-    """Get detailed activity log for analysis."""
-    with get_db() as conn:
-        cur = get_cursor(conn)
-        cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
-        cur.execute(
-            """SELECT date, activity_type, duration_min, kcal_active, hr_avg
-               FROM activity_log
-               WHERE profile_id=%s AND date >= %s
-               ORDER BY date DESC
-               LIMIT 50""",
-            (profile_id, cutoff)
+    """
+    Get detailed activity log for analysis.
+
+    Phase 0c: Refactored to use data_layer.activity_metrics.get_activity_detail_data()
+    This function now only FORMATS the data for AI consumption.
+    """
+    data = get_activity_detail_data(profile_id, days)
+
+    if data['confidence'] == 'insufficient':
+        return f"Keine Aktivitäten in den letzten {days} Tagen"
+
+    # Format as readable list (max 20 entries to avoid token bloat)
+    lines = []
+    for activity in data['activities'][:20]:
+        hr_str = f" HF={activity['hr_avg']}" if activity['hr_avg'] else ""
+        lines.append(
+            f"{activity['date']}: {activity['activity_type']} "
+            f"({activity['duration_min']}min, {activity['kcal_active']}kcal{hr_str})"
         )
-        rows = [r2d(r) for r in cur.fetchall()]
 
-        if not rows:
-            return f"Keine Aktivitäten in den letzten {days} Tagen"
-
-        # Format as readable list
-        lines = []
-        for r in rows:
-            hr_str = f" HF={r['hr_avg']}" if r.get('hr_avg') else ""
-            lines.append(
-                f"{r['date']}: {r['activity_type']} ({r['duration_min']}min, {r.get('kcal_active', 0)}kcal{hr_str})"
-            )
-
-        return '\n'.join(lines[:20])  # Max 20 entries to avoid token bloat
+    return '\n'.join(lines)
 
 
 def get_trainingstyp_verteilung(profile_id: str, days: int = 14) -> str:
-    """Get training type distribution."""
-    with get_db() as conn:
-        cur = get_cursor(conn)
-        cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
-        cur.execute(
-            """SELECT training_category, COUNT(*) as count
-               FROM activity_log
-               WHERE profile_id=%s AND date >= %s AND training_category IS NOT NULL
-               GROUP BY training_category
-               ORDER BY count DESC""",
-            (profile_id, cutoff)
-        )
-        rows = [r2d(r) for r in cur.fetchall()]
+    """
+    Get training type distribution.
 
-        if not rows:
-            return "Keine kategorisierten Trainings"
+    Phase 0c: Refactored to use data_layer.activity_metrics.get_training_type_distribution_data()
+    This function now only FORMATS the data for AI consumption.
+    """
+    data = get_training_type_distribution_data(profile_id, days)
 
-        total = sum(r['count'] for r in rows)
-        parts = [f"{r['training_category']}: {int(r['count']/total*100)}%" for r in rows[:3]]
-        return ", ".join(parts)
+    if data['confidence'] == 'insufficient' or not data['distribution']:
+        return "Keine kategorisierten Trainings"
+
+    # Format top 3 categories with percentages
+    parts = [
+        f"{dist['category']}: {int(dist['percentage'])}%"
+        for dist in data['distribution'][:3]
+    ]
+    return ", ".join(parts)
 
 
 def get_sleep_avg_duration(profile_id: str, days: int = 7) -> str: