- Add data_layer/ module structure with utils.py + body_metrics.py - Migrate 3 functions: weight_trend, body_composition, circumference_summary - Refactor placeholders to use data layer - Add charts router with 3 Chart.js endpoints - Tests: Syntax ✅, Confidence logic ✅ Phase 0c PoC (3 functions): Foundation for 40+ remaining functions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
242 lines
6.2 KiB
Python
242 lines
6.2 KiB
Python
"""
|
|
Data Layer Utilities
|
|
|
|
Shared helper functions for all data layer modules.
|
|
|
|
Functions:
|
|
- calculate_confidence(): Determine data quality confidence level
|
|
- serialize_dates(): Convert Python date objects to ISO strings for JSON
|
|
- safe_float(): Safe conversion from Decimal/None to float
|
|
- safe_int(): Safe conversion to int
|
|
|
|
Phase 0c: Multi-Layer Architecture
|
|
Version: 1.0
|
|
"""
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
from datetime import date
|
|
from decimal import Decimal
|
|
|
|
|
|
def calculate_confidence(
|
|
data_points: int,
|
|
days_requested: int,
|
|
metric_type: str = "general"
|
|
) -> str:
|
|
"""
|
|
Calculate confidence level based on data availability.
|
|
|
|
Args:
|
|
data_points: Number of actual data points available
|
|
days_requested: Number of days in analysis window
|
|
metric_type: Type of metric ("general", "correlation", "trend")
|
|
|
|
Returns:
|
|
Confidence level: "high" | "medium" | "low" | "insufficient"
|
|
|
|
Confidence Rules:
|
|
General (default):
|
|
- 7d: high >= 4, medium >= 3, low >= 2
|
|
- 28d: high >= 18, medium >= 12, low >= 8
|
|
- 90d: high >= 60, medium >= 40, low >= 30
|
|
|
|
Correlation:
|
|
- high >= 28, medium >= 21, low >= 14
|
|
|
|
Trend:
|
|
- high >= 70% of days, medium >= 50%, low >= 30%
|
|
|
|
Example:
|
|
>>> calculate_confidence(20, 28, "general")
|
|
'high'
|
|
>>> calculate_confidence(10, 28, "general")
|
|
'low'
|
|
"""
|
|
if data_points == 0:
|
|
return "insufficient"
|
|
|
|
if metric_type == "correlation":
|
|
# Correlation needs more paired data points
|
|
if data_points >= 28:
|
|
return "high"
|
|
elif data_points >= 21:
|
|
return "medium"
|
|
elif data_points >= 14:
|
|
return "low"
|
|
else:
|
|
return "insufficient"
|
|
|
|
elif metric_type == "trend":
|
|
# Trend analysis based on percentage of days covered
|
|
coverage = data_points / days_requested if days_requested > 0 else 0
|
|
|
|
if coverage >= 0.70:
|
|
return "high"
|
|
elif coverage >= 0.50:
|
|
return "medium"
|
|
elif coverage >= 0.30:
|
|
return "low"
|
|
else:
|
|
return "insufficient"
|
|
|
|
else: # "general"
|
|
# Different thresholds based on time window
|
|
if days_requested <= 7:
|
|
if data_points >= 4:
|
|
return "high"
|
|
elif data_points >= 3:
|
|
return "medium"
|
|
elif data_points >= 2:
|
|
return "low"
|
|
else:
|
|
return "insufficient"
|
|
|
|
elif days_requested <= 28:
|
|
if data_points >= 18:
|
|
return "high"
|
|
elif data_points >= 12:
|
|
return "medium"
|
|
elif data_points >= 8:
|
|
return "low"
|
|
else:
|
|
return "insufficient"
|
|
|
|
else: # 90+ days
|
|
if data_points >= 60:
|
|
return "high"
|
|
elif data_points >= 40:
|
|
return "medium"
|
|
elif data_points >= 30:
|
|
return "low"
|
|
else:
|
|
return "insufficient"
|
|
|
|
|
|
def serialize_dates(data: Any) -> Any:
|
|
"""
|
|
Convert Python date objects to ISO strings for JSON serialization.
|
|
|
|
Recursively walks through dicts, lists, and tuples converting date objects.
|
|
|
|
Args:
|
|
data: Any data structure (dict, list, tuple, or primitive)
|
|
|
|
Returns:
|
|
Same structure with dates converted to ISO strings
|
|
|
|
Example:
|
|
>>> serialize_dates({"date": date(2026, 3, 28), "value": 85.0})
|
|
{"date": "2026-03-28", "value": 85.0}
|
|
"""
|
|
if isinstance(data, dict):
|
|
return {k: serialize_dates(v) for k, v in data.items()}
|
|
elif isinstance(data, list):
|
|
return [serialize_dates(item) for item in data]
|
|
elif isinstance(data, tuple):
|
|
return tuple(serialize_dates(item) for item in data)
|
|
elif isinstance(data, date):
|
|
return data.isoformat()
|
|
else:
|
|
return data
|
|
|
|
|
|
def safe_float(value: Any, default: float = 0.0) -> float:
|
|
"""
|
|
Safely convert value to float.
|
|
|
|
Handles Decimal, None, and invalid values.
|
|
|
|
Args:
|
|
value: Value to convert (can be Decimal, int, float, str, None)
|
|
default: Default value if conversion fails
|
|
|
|
Returns:
|
|
Float value or default
|
|
|
|
Example:
|
|
>>> safe_float(Decimal('85.5'))
|
|
85.5
|
|
>>> safe_float(None)
|
|
0.0
|
|
>>> safe_float(None, -1.0)
|
|
-1.0
|
|
"""
|
|
if value is None:
|
|
return default
|
|
|
|
try:
|
|
if isinstance(value, Decimal):
|
|
return float(value)
|
|
return float(value)
|
|
except (ValueError, TypeError):
|
|
return default
|
|
|
|
|
|
def safe_int(value: Any, default: int = 0) -> int:
|
|
"""
|
|
Safely convert value to int.
|
|
|
|
Handles Decimal, None, and invalid values.
|
|
|
|
Args:
|
|
value: Value to convert
|
|
default: Default value if conversion fails
|
|
|
|
Returns:
|
|
Int value or default
|
|
|
|
Example:
|
|
>>> safe_int(Decimal('42'))
|
|
42
|
|
>>> safe_int(None)
|
|
0
|
|
"""
|
|
if value is None:
|
|
return default
|
|
|
|
try:
|
|
if isinstance(value, Decimal):
|
|
return int(value)
|
|
return int(value)
|
|
except (ValueError, TypeError):
|
|
return default
|
|
|
|
|
|
def calculate_baseline(
|
|
values: List[float],
|
|
method: str = "median"
|
|
) -> float:
|
|
"""
|
|
Calculate baseline value from a list of measurements.
|
|
|
|
Args:
|
|
values: List of numeric values
|
|
method: "median" (default) | "mean" | "trimmed_mean"
|
|
|
|
Returns:
|
|
Baseline value
|
|
|
|
Example:
|
|
>>> calculate_baseline([85.0, 84.5, 86.0, 84.8, 85.2])
|
|
85.0
|
|
"""
|
|
import statistics
|
|
|
|
if not values:
|
|
return 0.0
|
|
|
|
if method == "median":
|
|
return statistics.median(values)
|
|
elif method == "mean":
|
|
return statistics.mean(values)
|
|
elif method == "trimmed_mean":
|
|
# Remove top/bottom 10%
|
|
if len(values) < 10:
|
|
return statistics.mean(values)
|
|
sorted_vals = sorted(values)
|
|
trim_count = len(values) // 10
|
|
trimmed = sorted_vals[trim_count:-trim_count] if trim_count > 0 else sorted_vals
|
|
return statistics.mean(trimmed) if trimmed else 0.0
|
|
else:
|
|
return statistics.median(values) # Default to median
|