mitai-jinkendo/backend/data_layer/utils.py
Lars ffa99f10fb
All checks were successful
Deploy Development / deploy (push) Successful in 54s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 14s
fix: correct confidence thresholds for 30-89 day range
Bug: 30 days with 29 data points returned 'insufficient' because
it fell into the 90+ day branch which requires >= 30 data points.

Fix: Changed condition from 'days_requested <= 28' to 'days_requested < 90'
so that 8-89 day ranges use the medium-term thresholds:
- high >= 18 data points
- medium >= 12
- low >= 8

This means 30 days with 29 entries now returns 'high' confidence.

Affects: nutrition_avg, and all other medium-term metrics.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-28 21:03:22 +01:00

243 lines
6.3 KiB
Python

"""
Data Layer Utilities
Shared helper functions for all data layer modules.
Functions:
- calculate_confidence(): Determine data quality confidence level
- serialize_dates(): Convert Python date objects to ISO strings for JSON
- safe_float(): Safe conversion from Decimal/None to float
- safe_int(): Safe conversion to int
Phase 0c: Multi-Layer Architecture
Version: 1.0
"""
from typing import Any, Dict, List, Optional
from datetime import date
from decimal import Decimal
def calculate_confidence(
data_points: int,
days_requested: int,
metric_type: str = "general"
) -> str:
"""
Calculate confidence level based on data availability.
Args:
data_points: Number of actual data points available
days_requested: Number of days in analysis window
metric_type: Type of metric ("general", "correlation", "trend")
Returns:
Confidence level: "high" | "medium" | "low" | "insufficient"
Confidence Rules:
General (default):
- 7d: high >= 4, medium >= 3, low >= 2
- 28d: high >= 18, medium >= 12, low >= 8
- 90d: high >= 60, medium >= 40, low >= 30
Correlation:
- high >= 28, medium >= 21, low >= 14
Trend:
- high >= 70% of days, medium >= 50%, low >= 30%
Example:
>>> calculate_confidence(20, 28, "general")
'high'
>>> calculate_confidence(10, 28, "general")
'low'
"""
if data_points == 0:
return "insufficient"
if metric_type == "correlation":
# Correlation needs more paired data points
if data_points >= 28:
return "high"
elif data_points >= 21:
return "medium"
elif data_points >= 14:
return "low"
else:
return "insufficient"
elif metric_type == "trend":
# Trend analysis based on percentage of days covered
coverage = data_points / days_requested if days_requested > 0 else 0
if coverage >= 0.70:
return "high"
elif coverage >= 0.50:
return "medium"
elif coverage >= 0.30:
return "low"
else:
return "insufficient"
else: # "general"
# Different thresholds based on time window
if days_requested <= 7:
if data_points >= 4:
return "high"
elif data_points >= 3:
return "medium"
elif data_points >= 2:
return "low"
else:
return "insufficient"
elif days_requested < 90:
# 8-89 days: Medium-term analysis
if data_points >= 18:
return "high"
elif data_points >= 12:
return "medium"
elif data_points >= 8:
return "low"
else:
return "insufficient"
else: # 90+ days: Long-term analysis
if data_points >= 60:
return "high"
elif data_points >= 40:
return "medium"
elif data_points >= 30:
return "low"
else:
return "insufficient"
def serialize_dates(data: Any) -> Any:
"""
Convert Python date objects to ISO strings for JSON serialization.
Recursively walks through dicts, lists, and tuples converting date objects.
Args:
data: Any data structure (dict, list, tuple, or primitive)
Returns:
Same structure with dates converted to ISO strings
Example:
>>> serialize_dates({"date": date(2026, 3, 28), "value": 85.0})
{"date": "2026-03-28", "value": 85.0}
"""
if isinstance(data, dict):
return {k: serialize_dates(v) for k, v in data.items()}
elif isinstance(data, list):
return [serialize_dates(item) for item in data]
elif isinstance(data, tuple):
return tuple(serialize_dates(item) for item in data)
elif isinstance(data, date):
return data.isoformat()
else:
return data
def safe_float(value: Any, default: float = 0.0) -> float:
"""
Safely convert value to float.
Handles Decimal, None, and invalid values.
Args:
value: Value to convert (can be Decimal, int, float, str, None)
default: Default value if conversion fails
Returns:
Float value or default
Example:
>>> safe_float(Decimal('85.5'))
85.5
>>> safe_float(None)
0.0
>>> safe_float(None, -1.0)
-1.0
"""
if value is None:
return default
try:
if isinstance(value, Decimal):
return float(value)
return float(value)
except (ValueError, TypeError):
return default
def safe_int(value: Any, default: int = 0) -> int:
"""
Safely convert value to int.
Handles Decimal, None, and invalid values.
Args:
value: Value to convert
default: Default value if conversion fails
Returns:
Int value or default
Example:
>>> safe_int(Decimal('42'))
42
>>> safe_int(None)
0
"""
if value is None:
return default
try:
if isinstance(value, Decimal):
return int(value)
return int(value)
except (ValueError, TypeError):
return default
def calculate_baseline(
values: List[float],
method: str = "median"
) -> float:
"""
Calculate baseline value from a list of measurements.
Args:
values: List of numeric values
method: "median" (default) | "mean" | "trimmed_mean"
Returns:
Baseline value
Example:
>>> calculate_baseline([85.0, 84.5, 86.0, 84.8, 85.2])
85.0
"""
import statistics
if not values:
return 0.0
if method == "median":
return statistics.median(values)
elif method == "mean":
return statistics.mean(values)
elif method == "trimmed_mean":
# Remove top/bottom 10%
if len(values) < 10:
return statistics.mean(values)
sorted_vals = sorted(values)
trim_count = len(values) // 10
trimmed = sorted_vals[trim_count:-trim_count] if trim_count > 0 else sorted_vals
return statistics.mean(trimmed) if trimmed else 0.0
else:
return statistics.median(values) # Default to median