""" Data Layer Utilities Shared helper functions for all data layer modules. Functions: - calculate_confidence(): Determine data quality confidence level - serialize_dates(): Convert Python date objects to ISO strings for JSON - safe_float(): Safe conversion from Decimal/None to float - safe_int(): Safe conversion to int Phase 0c: Multi-Layer Architecture Version: 1.0 """ from typing import Any, Dict, List, Optional from datetime import date from decimal import Decimal def calculate_confidence( data_points: int, days_requested: int, metric_type: str = "general" ) -> str: """ Calculate confidence level based on data availability. Args: data_points: Number of actual data points available days_requested: Number of days in analysis window metric_type: Type of metric ("general", "correlation", "trend") Returns: Confidence level: "high" | "medium" | "low" | "insufficient" Confidence Rules: General (default): - 7d: high >= 4, medium >= 3, low >= 2 - 28d: high >= 18, medium >= 12, low >= 8 - 90d: high >= 60, medium >= 40, low >= 30 Correlation: - high >= 28, medium >= 21, low >= 14 Trend: - high >= 70% of days, medium >= 50%, low >= 30% Example: >>> calculate_confidence(20, 28, "general") 'high' >>> calculate_confidence(10, 28, "general") 'low' """ if data_points == 0: return "insufficient" if metric_type == "correlation": # Correlation needs more paired data points if data_points >= 28: return "high" elif data_points >= 21: return "medium" elif data_points >= 14: return "low" else: return "insufficient" elif metric_type == "trend": # Trend analysis based on percentage of days covered coverage = data_points / days_requested if days_requested > 0 else 0 if coverage >= 0.70: return "high" elif coverage >= 0.50: return "medium" elif coverage >= 0.30: return "low" else: return "insufficient" else: # "general" # Different thresholds based on time window if days_requested <= 7: if data_points >= 4: return "high" elif data_points >= 3: return "medium" elif data_points >= 2: return "low" else: return "insufficient" elif days_requested < 90: # 8-89 days: Medium-term analysis if data_points >= 18: return "high" elif data_points >= 12: return "medium" elif data_points >= 8: return "low" else: return "insufficient" else: # 90+ days: Long-term analysis if data_points >= 60: return "high" elif data_points >= 40: return "medium" elif data_points >= 30: return "low" else: return "insufficient" def serialize_dates(data: Any) -> Any: """ Convert Python date objects to ISO strings for JSON serialization. Recursively walks through dicts, lists, and tuples converting date objects. Args: data: Any data structure (dict, list, tuple, or primitive) Returns: Same structure with dates converted to ISO strings Example: >>> serialize_dates({"date": date(2026, 3, 28), "value": 85.0}) {"date": "2026-03-28", "value": 85.0} """ if isinstance(data, dict): return {k: serialize_dates(v) for k, v in data.items()} elif isinstance(data, list): return [serialize_dates(item) for item in data] elif isinstance(data, tuple): return tuple(serialize_dates(item) for item in data) elif isinstance(data, date): return data.isoformat() else: return data def safe_float(value: Any, default: float = 0.0) -> float: """ Safely convert value to float. Handles Decimal, None, and invalid values. Args: value: Value to convert (can be Decimal, int, float, str, None) default: Default value if conversion fails Returns: Float value or default Example: >>> safe_float(Decimal('85.5')) 85.5 >>> safe_float(None) 0.0 >>> safe_float(None, -1.0) -1.0 """ if value is None: return default try: if isinstance(value, Decimal): return float(value) return float(value) except (ValueError, TypeError): return default def safe_int(value: Any, default: int = 0) -> int: """ Safely convert value to int. Handles Decimal, None, and invalid values. Args: value: Value to convert default: Default value if conversion fails Returns: Int value or default Example: >>> safe_int(Decimal('42')) 42 >>> safe_int(None) 0 """ if value is None: return default try: if isinstance(value, Decimal): return int(value) return int(value) except (ValueError, TypeError): return default def calculate_baseline( values: List[float], method: str = "median" ) -> float: """ Calculate baseline value from a list of measurements. Args: values: List of numeric values method: "median" (default) | "mean" | "trimmed_mean" Returns: Baseline value Example: >>> calculate_baseline([85.0, 84.5, 86.0, 84.8, 85.2]) 85.0 """ import statistics if not values: return 0.0 if method == "median": return statistics.median(values) elif method == "mean": return statistics.mean(values) elif method == "trimmed_mean": # Remove top/bottom 10% if len(values) < 10: return statistics.mean(values) sorted_vals = sorted(values) trim_count = len(values) // 10 trimmed = sorted_vals[trim_count:-trim_count] if trim_count > 0 else sorted_vals return statistics.mean(trimmed) if trimmed else 0.0 else: return statistics.median(values) # Default to median