mitai-jinkendo/backend/data_layer/vitals_fitness_insights.py
Lars ce84f330f0
All checks were successful
Deploy Development / deploy (push) Successful in 51s
Build Test / pytest-backend (push) Successful in 5s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s
feat: add German number formatting functions and enhance narrative context in vital signs insights
- Introduced `_de_num` and `_de_num_signed` functions for formatting decimal numbers with a comma, improving text presentation in German.
- Updated `_build_consolidated_paragraphs` to utilize new formatting functions for HRV and resting heart rate comparisons, enhancing clarity in insights.
- Refined narrative descriptions for better contextual understanding of vital signs trends and their implications.
2026-04-20 10:55:49 +02:00

358 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Vitalwerte: Zeitreihen + einfache Fitness-/Recovery-Einordnung (Layer 1, Issue 53).
Keine Diagnose — deskriptive Trends, Korrelationen und Varianz-Hinweise.
"""
from __future__ import annotations
import statistics
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Sequence
from db import get_db, get_cursor
from data_layer.utils import safe_float, serialize_dates
SERIES_CONFIG = (
("resting_hr", "Ruhepuls", "bpm", "#3B82F6"),
("hrv", "HRV", "ms", "#1D9E75"),
("vo2_max", "VO2max", "ml/kg/min", "#8B5CF6"),
("spo2", "SpO2", "%", "#0EA5E9"),
("respiratory_rate", "Atemfrequenz", "/min", "#F59E0B"),
)
def _date_to_ord(d: Any) -> float:
if hasattr(d, "toordinal"):
return float(d.toordinal())
if isinstance(d, str):
return float(datetime.fromisoformat(d[:10]).date().toordinal())
return 0.0
def _linear_slope(dates: Sequence[Any], values: Sequence[float]) -> float:
if len(values) < 3 or len(dates) != len(values):
return 0.0
xs = [_date_to_ord(d) for d in dates]
ys = list(values)
n = len(xs)
mx = sum(xs) / n
my = sum(ys) / n
den = sum((x - mx) ** 2 for x in xs)
if den < 1e-9:
return 0.0
return sum((x - mx) * (y - my) for x, y in zip(xs, ys)) / den
def _pearson(xs: Sequence[float], ys: Sequence[float]) -> Optional[float]:
n = len(xs)
if n < 5 or len(ys) != n:
return None
mx = statistics.mean(xs)
my = statistics.mean(ys)
sx = statistics.pstdev(xs) if n > 1 else 0.0
sy = statistics.pstdev(ys) if n > 1 else 0.0
if sx < 1e-9 or sy < 1e-9:
return None
cov = sum((x - mx) * (y - my) for x, y in zip(xs, ys)) / n
return cov / (sx * sy)
def _daily_training_load(cur: Any, profile_id: str, cutoff: str) -> Dict[str, float]:
"""Summe Trainingsminuten pro Kalendertag als Belastungs-Proxy."""
cur.execute(
"""
SELECT date::text AS d, COALESCE(SUM(duration_min), 0)::float AS minutes
FROM activity_log
WHERE profile_id = %s AND date >= %s::date AND duration_min IS NOT NULL AND duration_min > 0
GROUP BY date
ORDER BY date
""",
(profile_id, cutoff),
)
rows = cur.fetchall()
return {r["d"]: float(r["minutes"]) for r in rows}
def _trailing_window_means(vals: List[float], window: int = 7) -> List[float]:
"""Gleitender Mittelwert über die letzten bis zu `window` aufeinanderfolgenden Messungen (nicht Kalendertage)."""
out: List[float] = []
for i in range(len(vals)):
chunk = vals[max(0, i - window + 1) : i + 1]
out.append(round(statistics.mean(chunk), 2))
return out
def _de_num(x: float) -> str:
"""Dezimalzahl mit Komma für Fließtext."""
return f"{x:.1f}".replace(".", ",")
def _de_num_signed(x: float) -> str:
"""Wie _de_num, mit explizitem Vorzeichen (für %-Abweichungen)."""
return f"{x:+.1f}".replace(".", ",")
def _build_consolidated_paragraphs(
series: Dict[str, Any],
hrv_vs_baseline_pct: Optional[float],
rhr_vs_baseline_pct: Optional[float],
r_pearson: Optional[float],
pairs_n: int,
) -> List[str]:
"""
Thematisch zusammengeführte Absätze — inhaltlich alle früheren Einzel-Karten (Bullets),
ohne die Aussagen zu streichen (Redundanz nur bei wörtlicher Doppelung vermeiden).
"""
paras: List[str] = []
# ── Referenzlage (HRV/Ruhepuls vs. ältere Basis), wie zuvor in KPI/Narrativ genutzt
basis_bits: List[str] = []
if hrv_vs_baseline_pct is not None:
basis_bits.append(
f"HRV liegt gegenüber der älteren Referenz bei {_de_num_signed(float(hrv_vs_baseline_pct))} %"
)
if rhr_vs_baseline_pct is not None:
basis_bits.append(
f"Ruhepuls relativ zur Referenz bei {_de_num_signed(float(rhr_vs_baseline_pct))} %"
)
if basis_bits:
paras.append(
" ".join(basis_bits)
+ " — Vergleich kurzfristiges Mittel gegenüber älterer Basis; individuell interpretieren."
)
rhr = series.get("resting_hr")
hrv_s = series.get("hrv")
# ── Ruhepuls: letzte 7 Messungen vs. vorangehendes Fenster (wie frühere Karten)
rhr_short_compare = ""
if rhr and rhr.get("points") and len(rhr["points"]) >= 10:
pts = rhr["points"]
last7 = [p["value"] for p in pts[-7:]]
before = [p["value"] for p in pts[:-7][-14:]] if len(pts) > 7 else []
if before:
m7 = statistics.mean(last7)
mb = statistics.mean(before)
diff = m7 - mb
if diff > 3:
rhr_short_compare = (
f"Die letzten 7 Messungen liegen im Mittel ca. {_de_num(diff)} bpm über dem vorangehenden Fenster — "
"kann mit Belastung, Stress, Schlaf oder Infekt zusammenhängen."
)
elif diff < -3:
rhr_short_compare = (
"Der Ruhepuls liegt im kurzen Vergleich unter dem vorherigen Mittel — oft mit Entlastung oder "
"besserer Regeneration vereinbar (individuell)."
)
# ── Streuung: frühere Schwellen n ≥ 6 für die ausführlichen Varianz-Hinweise
rhr_var_sentence = ""
if (
rhr
and rhr.get("stdev") is not None
and rhr.get("n", 0) >= 6
):
rhr_var_sentence = (
f"Standardabweichung im Fenster ca. {_de_num(float(rhr['stdev']))} bpm — kurzfristige Schwankungen sind normal; "
"extreme Sprünge mit Kontext (Training, Schlaf) betrachten."
)
hrv_var_sentence = ""
if (
hrv_s
and hrv_s.get("stdev") is not None
and hrv_s.get("n", 0) >= 6
):
hrv_var_sentence = (
f"HRV schwankt im Fenster (σ{_de_num(float(hrv_s['stdev']))} ms). "
"Vergleich mit der eigenen Basis ist aussagekräftiger als Einzelwerte."
)
# Gestrichelte Linie = gleitender Mittelwert (neuer Kontext, ergänzt nicht ersetzt)
ma_hint = (
"Einzelwerte können stark springen; die gestrichelte Linie im Diagramm zeigt einen gleitenden Mittelwert "
"über bis zu sieben aufeinanderfolgende Messungen (nicht Kalendertage)."
)
block_b_parts: List[str] = []
if rhr_short_compare:
block_b_parts.append(rhr_short_compare)
if rhr_var_sentence:
block_b_parts.append(rhr_var_sentence)
if hrv_var_sentence:
block_b_parts.append(hrv_var_sentence)
if block_b_parts:
paras.append(ma_hint + " " + " ".join(block_b_parts))
elif series:
# Kein Kurzvergleich/keine σ-Sätze, aber mindestens eine Vital-Zeitreihe: MA-Hinweis (Diagramm)
paras.append(ma_hint)
# ── VO2max: Wortlaut wie in den früheren Bullet-Karten
vo2 = series.get("vo2_max")
if vo2 and vo2.get("n", 0) >= 4 and vo2.get("slope_per_day") is not None:
s = vo2["slope_per_day"]
if s > 0.002:
paras.append(
"Im gewählten Fenster steigt der erfasste VO2max tendenziell — häufig mit Trainingsreiz oder "
"besserer Datenlage vereinbar."
)
elif s < -0.002:
paras.append(
"VO2max zeigt im Fenster einen fallenden Trend — kann z. B. durch Pause, Krankheit oder Messrauschen "
"entstehen; Verlauf beobachten."
)
# ── Belastung vs. Folge-Ruhepuls: frühere Formulierungen + r/n wo berechnet
if r_pearson is not None and pairs_n >= 8:
if r_pearson > 0.35:
paras.append(
"An Tagen nach höherer Trainingsdauer (Minuten-Summe) steigt der Ruhepuls am nächsten Morgen in deinen "
"Daten tendenziell — typisches Muster während Erholungsreaktion (kein Kausalbeweis). "
f"Korrelation (Trainingsminuten am Tag → Ruhepuls am Folgetag): r ≈ {r_pearson:.2f} bei n = {pairs_n} Paaren."
)
elif r_pearson < -0.25:
paras.append(
"Es zeigt sich ein leicht negatives Zusammenspiel zwischen Tages-Belastung und Folge-Ruhepuls in diesem "
f"Fenster — stark von Datenlage und Ausreißern abhängig. r ≈ {r_pearson:.2f}, n = {pairs_n} Paare."
)
return [p for p in paras if p]
def _rhr_by_date(cur: Any, profile_id: str, cutoff: str) -> Dict[str, float]:
cur.execute(
"""
SELECT date::text AS d, resting_hr::float AS rhr
FROM vitals_baseline
WHERE profile_id = %s AND date >= %s::date AND resting_hr IS NOT NULL
ORDER BY date
""",
(profile_id, cutoff),
)
return {r["d"]: float(r["rhr"]) for r in cur.fetchall()}
def build_vitals_history_and_analytics(
profile_id: str,
days: int,
hrv_vs_baseline_pct: Optional[float] = None,
rhr_vs_baseline_pct: Optional[float] = None,
) -> Dict[str, Any]:
"""
Zeitreihen pro Kennzahl (eigene Einheit / eigene Skala im Frontend) + zusammengefasste Einordnung.
Optional: Abweichung HRV/Ruhepuls zur älteren Basis — für einen Absatz statt doppelter KPI-Texte.
"""
if days < 7:
days = 7
if days > 365:
days = 365
cutoff = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
with get_db() as conn:
cur = get_cursor(conn)
cur.execute(
"""
SELECT date, resting_hr, hrv, vo2_max, spo2, respiratory_rate
FROM vitals_baseline
WHERE profile_id = %s AND date >= %s
ORDER BY date ASC
""",
(profile_id, cutoff),
)
rows = cur.fetchall()
series: Dict[str, Any] = {}
for key, label_de, unit, color in SERIES_CONFIG:
pts: List[Dict[str, Any]] = []
dates: List[Any] = []
vals: List[float] = []
for r in rows:
v = r.get(key)
if v is None:
continue
fv = safe_float(v)
d = r["date"]
d_iso = d.isoformat() if hasattr(d, "isoformat") else str(d)[:10]
pts.append({"date": d_iso, "value": round(fv, 2)})
dates.append(d)
vals.append(fv)
if pts:
ma_vals = _trailing_window_means(vals, window=7)
points_ma7 = [
{"date": pts[i]["date"], "value": ma_vals[i]} for i in range(len(pts))
]
series[key] = {
"key": key,
"label_de": label_de,
"unit": unit,
"color": color,
"points": pts,
"points_ma7": points_ma7,
"n": len(pts),
"last": vals[-1] if vals else None,
"mean": round(statistics.mean(vals), 2) if len(vals) >= 1 else None,
"stdev": round(statistics.pstdev(vals), 2) if len(vals) >= 2 else None,
"slope_per_day": round(_linear_slope(dates, vals), 6) if len(vals) >= 3 else None,
}
# Belastung (Activity) vs Ruhepuls am Folgetag
with get_db() as conn:
cur = get_cursor(conn)
load_by_d = _daily_training_load(cur, profile_id, cutoff)
rhr_by_d = _rhr_by_date(cur, profile_id, cutoff)
pairs_load: List[float] = []
pairs_rhr: List[float] = []
for d_str, load_min in load_by_d.items():
try:
d0 = datetime.fromisoformat(d_str[:10]).date()
except ValueError:
continue
d1 = (d0 + timedelta(days=1)).isoformat()
if d1 in rhr_by_d and load_min > 0:
pairs_load.append(load_min)
pairs_rhr.append(rhr_by_d[d1])
r_pearson = _pearson(pairs_load, pairs_rhr) if len(pairs_load) >= 8 else None
pairs_n = len(pairs_load)
consolidated = _build_consolidated_paragraphs(
series,
hrv_vs_baseline_pct,
rhr_vs_baseline_pct,
r_pearson,
pairs_n,
)
if not series:
return {
"chart_type": "vitals_dashboard",
"window_days": days,
"series": {},
"analytics": {"bullets": [], "consolidated_paragraphs": consolidated},
"metadata": {
"confidence": "insufficient",
"message": "Keine Vital-Zeitreihen im Fenster",
"load_rhr_pairs_n": pairs_n,
"load_rhr_correlation": round(r_pearson, 3) if r_pearson is not None else None,
},
}
return {
"chart_type": "vitals_dashboard",
"window_days": days,
"series": serialize_dates(series),
"analytics": {
"bullets": [],
"consolidated_paragraphs": consolidated,
},
"metadata": {
"confidence": "medium",
"note": "Deskriptive Auswertung; keine medizinische Diagnose.",
"load_rhr_pairs_n": pairs_n,
"load_rhr_correlation": round(r_pearson, 3) if r_pearson is not None else None,
},
}