Optimierung Platzhalter Umfang #88
|
|
@ -18,6 +18,7 @@ Dieses Dokument ist **normativ für Agenten**, die ein neues Import-Zielmodul an
|
||||||
| Admin-Systemvorlagen | `backend/routers/admin_csv_templates.py` |
|
| Admin-Systemvorlagen | `backend/routers/admin_csv_templates.py` |
|
||||||
| Nutzer-Import (Profil-Mappings) | `backend/routers/csv_import.py` |
|
| Nutzer-Import (Profil-Mappings) | `backend/routers/csv_import.py` |
|
||||||
| Vorlagen-Validierung (strukturell + Sample) | `backend/csv_parser/template_validator.py` (`validate_csv_template`) |
|
| Vorlagen-Validierung (strukturell + Sample) | `backend/csv_parser/template_validator.py` (`validate_csv_template`) |
|
||||||
|
| Effektives Listentrennzeichen | `backend/csv_parser/core.py` (`resolve_effective_csv_delimiter`) — Datei kann `;` (z. B. Apple DE) haben, Vorlage `,` (EN); Import/Diagnose **nicht** nur das gespeicherte Trennzeichen blind nutzen. |
|
||||||
|
|
||||||
**Single Source of Truth** für erlaubte Zielfelder, Typen und Duplikat-Keys ist **`module_registry.py`**. Keine parallele Feldliste in Routern duplizieren.
|
**Single Source of Truth** für erlaubte Zielfelder, Typen und Duplikat-Keys ist **`module_registry.py`**. Keine parallele Feldliste in Routern duplizieren.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,46 @@ def sniff_delimiter(sample_line: str) -> str:
|
||||||
return best
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def _csv_field_count(line: str, delimiter: str) -> int:
|
||||||
|
"""Anzahl Felder in einer Zeile (csv.reader, berücksichtigt Anführungszeichen)."""
|
||||||
|
if not line or not line.strip():
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
row = next(csv.reader(io.StringIO(line), delimiter=delimiter))
|
||||||
|
except StopIteration:
|
||||||
|
return 0
|
||||||
|
return len(row)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_effective_csv_delimiter(text: str, template_delimiter: str | None = None) -> str:
|
||||||
|
"""
|
||||||
|
Trennzeichen für die hochgeladene Datei wählen. Gespeicherte Vorlagen haben oft «,»
|
||||||
|
(Apple EN), tatsächliche Exporte je nach Region «;» (Apple DE / Excel) — mit falschem
|
||||||
|
Zeichen wird die Kopfzeile zu **einer** Spalte und das Mapping bricht vollständig.
|
||||||
|
"""
|
||||||
|
tpl = (template_delimiter or "").strip()
|
||||||
|
if tpl not in _DEFAULT_DELIMS:
|
||||||
|
tpl = None
|
||||||
|
|
||||||
|
lines = _split_first_lines(text, max_lines=5)
|
||||||
|
if not lines:
|
||||||
|
return tpl or ","
|
||||||
|
|
||||||
|
header = lines[0]
|
||||||
|
scores: list[tuple[int, str]] = []
|
||||||
|
for d in _DEFAULT_DELIMS:
|
||||||
|
scores.append((_csv_field_count(header, d), d))
|
||||||
|
|
||||||
|
max_n = max(n for n, _ in scores)
|
||||||
|
if max_n <= 1:
|
||||||
|
return tpl or sniff_delimiter(header)
|
||||||
|
|
||||||
|
at_max = [d for n, d in scores if n == max_n]
|
||||||
|
if tpl and tpl in at_max:
|
||||||
|
return tpl
|
||||||
|
return at_max[0]
|
||||||
|
|
||||||
|
|
||||||
def _split_first_lines(text: str, max_lines: int = 5) -> List[str]:
|
def _split_first_lines(text: str, max_lines: int = 5) -> List[str]:
|
||||||
lines: List[str] = []
|
lines: List[str] = []
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from typing import Any
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from csv_parser.core import iter_csv_dict_rows
|
from csv_parser.core import iter_csv_dict_rows, resolve_effective_csv_delimiter
|
||||||
from csv_parser.import_row_processing import (
|
from csv_parser.import_row_processing import (
|
||||||
aggregate_mapped_rows,
|
aggregate_mapped_rows,
|
||||||
resolve_import_row_processing,
|
resolve_import_row_processing,
|
||||||
|
|
@ -97,7 +97,8 @@ def run_universal_csv_import(
|
||||||
if tc is not None and not isinstance(tc, dict):
|
if tc is not None and not isinstance(tc, dict):
|
||||||
tc = None
|
tc = None
|
||||||
|
|
||||||
delim = mapping.get("delimiter") or ","
|
tpl_delim = str(mapping.get("delimiter") or ",").strip() or ","
|
||||||
|
delim = resolve_effective_csv_delimiter(text, tpl_delim)
|
||||||
has_header = mapping.get("has_header", True)
|
has_header = mapping.get("has_header", True)
|
||||||
|
|
||||||
if module == "nutrition":
|
if module == "nutrition":
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,10 @@ import statistics
|
||||||
from db import get_db, get_cursor, r2d
|
from db import get_db, get_cursor, r2d
|
||||||
from data_layer.activity_session_metrics import enrich_sessions_with_metrics
|
from data_layer.activity_session_metrics import enrich_sessions_with_metrics
|
||||||
from data_layer.utils import calculate_confidence, safe_float, safe_int, serialize_dates
|
from data_layer.utils import calculate_confidence, safe_float, safe_int, serialize_dates
|
||||||
|
from data_layer.prompt_output_compact import (
|
||||||
|
normalize_prompt_number,
|
||||||
|
session_metrics_list_to_key_value_compact,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_activity_summary_data(
|
def get_activity_summary_data(
|
||||||
|
|
@ -1094,6 +1098,10 @@ def get_training_sessions_recent_weeks_data(
|
||||||
Letzte Wochen mit Einzeltrainings für KI-Kontext (Dauer, kcal, HF, Typ).
|
Letzte Wochen mit Einzeltrainings für KI-Kontext (Dauer, kcal, HF, Typ).
|
||||||
|
|
||||||
weeks: Anzahl zurückliegender ISO-Kalenderwochen (Default 4).
|
weeks: Anzahl zurückliegender ISO-Kalenderwochen (Default 4).
|
||||||
|
|
||||||
|
session_metrics pro Einheit: kompaktes Objekt ``{key: Wert}`` (keine wiederholten
|
||||||
|
Namen/Beschreibungen). Bedeutung der Keys: Platzhalter ``{{training_parameters_glossary_md}}``.
|
||||||
|
Zahlen werden für Prompt-Token kompakt gerundet.
|
||||||
"""
|
"""
|
||||||
days = max(weeks * 7, 7)
|
days = max(weeks * 7, 7)
|
||||||
with get_db() as conn:
|
with get_db() as conn:
|
||||||
|
|
@ -1131,6 +1139,8 @@ def get_training_sessions_recent_weeks_data(
|
||||||
"days_loaded": days,
|
"days_loaded": days,
|
||||||
"session_count": 0,
|
"session_count": 0,
|
||||||
"confidence": "insufficient",
|
"confidence": "insufficient",
|
||||||
|
"session_metrics_shape": "key_value",
|
||||||
|
"metric_semantics_placeholder": "{{training_parameters_glossary_md}}",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1149,6 +1159,7 @@ def get_training_sessions_recent_weeks_data(
|
||||||
kcal_f = float(kcal) if kcal is not None else None
|
kcal_f = float(kcal) if kcal is not None else None
|
||||||
hr_a = r.get("hr_avg")
|
hr_a = r.get("hr_avg")
|
||||||
hr_m = r.get("hr_max")
|
hr_m = r.get("hr_max")
|
||||||
|
sm_compact = session_metrics_list_to_key_value_compact(r.get("session_metrics"))
|
||||||
by_week[wk].append(
|
by_week[wk].append(
|
||||||
{
|
{
|
||||||
"id": str(r["id"]),
|
"id": str(r["id"]),
|
||||||
|
|
@ -1157,12 +1168,12 @@ def get_training_sessions_recent_weeks_data(
|
||||||
"activity_type": r.get("activity_type"),
|
"activity_type": r.get("activity_type"),
|
||||||
"training_category": r.get("training_category"),
|
"training_category": r.get("training_category"),
|
||||||
"training_type_name": r.get("training_type_name"),
|
"training_type_name": r.get("training_type_name"),
|
||||||
"duration_min": dur_f,
|
"duration_min": normalize_prompt_number(dur_f) if dur_f is not None else None,
|
||||||
"kcal_active": kcal_f,
|
"kcal_active": normalize_prompt_number(kcal_f) if kcal_f is not None else None,
|
||||||
"hr_avg": int(hr_a) if hr_a is not None else None,
|
"hr_avg": int(hr_a) if hr_a is not None else None,
|
||||||
"hr_max": int(hr_m) if hr_m is not None else None,
|
"hr_max": int(hr_m) if hr_m is not None else None,
|
||||||
"rpe": int(r["rpe"]) if r.get("rpe") is not None else None,
|
"rpe": int(r["rpe"]) if r.get("rpe") is not None else None,
|
||||||
"session_metrics": r.get("session_metrics", []),
|
"session_metrics": sm_compact,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1177,6 +1188,8 @@ def get_training_sessions_recent_weeks_data(
|
||||||
"days_loaded": days,
|
"days_loaded": days,
|
||||||
"session_count": len(rows),
|
"session_count": len(rows),
|
||||||
"confidence": confidence,
|
"confidence": confidence,
|
||||||
|
"session_metrics_shape": "key_value",
|
||||||
|
"metric_semantics_placeholder": "{{training_parameters_glossary_md}}",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,31 @@ from data_layer.activity_data_canon import (
|
||||||
ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM,
|
ACTIVITY_LOG_LEGACY_COLUMN_FOR_EAV_PRIMARY_PARAM,
|
||||||
ACTIVITY_MODULE_REGISTRY_FIELD_KEYS,
|
ACTIVITY_MODULE_REGISTRY_FIELD_KEYS,
|
||||||
)
|
)
|
||||||
|
from data_layer.prompt_output_compact import normalize_prompt_number
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_metric_value_for_read(data_type: str, val: Any) -> Any:
|
||||||
|
"""Lesepfad (Layer 1): keine unnötig langen Float-Strings für KI/UI (Issue 53 / Platzhalter)."""
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
dt = (data_type or "").strip().lower()
|
||||||
|
if dt == "string":
|
||||||
|
return normalize_prompt_number(val)
|
||||||
|
if dt == "boolean":
|
||||||
|
return bool(val)
|
||||||
|
if dt == "integer":
|
||||||
|
try:
|
||||||
|
if isinstance(val, bool):
|
||||||
|
return int(val)
|
||||||
|
return int(val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return normalize_prompt_number(val)
|
||||||
|
if dt == "float":
|
||||||
|
return normalize_prompt_number(val)
|
||||||
|
return normalize_prompt_number(val)
|
||||||
|
|
||||||
# Diese Spalten nicht aus CSV-Parameter-Zuordnung überschreiben (kommen aus Typ-Mapping / System).
|
# Diese Spalten nicht aus CSV-Parameter-Zuordnung überschreiben (kommen aus Typ-Mapping / System).
|
||||||
ACTIVITY_LOG_PATCH_FORBIDDEN = frozenset(
|
ACTIVITY_LOG_PATCH_FORBIDDEN = frozenset(
|
||||||
{
|
{
|
||||||
|
|
@ -430,6 +452,8 @@ def merge_column_backed_and_eav_metrics(
|
||||||
keys_handled.add(k)
|
keys_handled.add(k)
|
||||||
|
|
||||||
merged.sort(key=lambda x: x["key"])
|
merged.sort(key=lambda x: x["key"])
|
||||||
|
for m in merged:
|
||||||
|
m["value"] = _normalize_metric_value_for_read(m.get("data_type") or "", m.get("value"))
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
152
backend/data_layer/prompt_output_compact.py
Normal file
152
backend/data_layer/prompt_output_compact.py
Normal file
|
|
@ -0,0 +1,152 @@
|
||||||
|
"""
|
||||||
|
Kompakte Zahlen- und JSON-Aufbereitung für KI-Platzhalter (Token sparen).
|
||||||
|
|
||||||
|
- Floats: sinnvolle Nachkommastellen je nach Größenordnung (kleine Werte <0,1 mehr Präzision).
|
||||||
|
- ≥10 meist ganzzahlig; Prozent/Verhältnisse über denselben Mechanismus lesbar.
|
||||||
|
- Rekursiv auf dict/list-Strukturen vor json.dumps in _safe_json anwendbar.
|
||||||
|
|
||||||
|
Hinweis: numpy.float64 und numerische Strings (DB/API) sind keine ``float``-Instanzen —
|
||||||
|
diese werden explizit mit float() normalisiert.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
from decimal import Decimal
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
def compact_float_for_prompt(x: float) -> float | int:
|
||||||
|
"""
|
||||||
|
Reduziert unnötige Nachkommastellen; erhält kleine Beträge (<0,1) mit mehr Stellen.
|
||||||
|
"""
|
||||||
|
if not math.isfinite(x):
|
||||||
|
return x
|
||||||
|
ax = abs(x)
|
||||||
|
if ax == 0.0:
|
||||||
|
return 0
|
||||||
|
if ax >= 100.0:
|
||||||
|
return int(round(x))
|
||||||
|
if ax >= 10.0:
|
||||||
|
return int(round(x))
|
||||||
|
if ax >= 1.0:
|
||||||
|
r = round(x, 2)
|
||||||
|
return int(r) if abs(r - int(round(r))) < 1e-6 else r
|
||||||
|
if ax >= 0.1:
|
||||||
|
r = round(x, 2)
|
||||||
|
return int(r) if abs(r - int(round(r))) < 1e-6 else r
|
||||||
|
if ax >= 0.01:
|
||||||
|
return round(x, 3)
|
||||||
|
return round(x, 4)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_prompt_number(x: Any) -> Any:
|
||||||
|
"""int/Decimal/float kompakt; numpy-Scalars; numerische Strings; sonst unverändert."""
|
||||||
|
if x is None:
|
||||||
|
return None
|
||||||
|
if isinstance(x, bool):
|
||||||
|
return x
|
||||||
|
if isinstance(x, int) and not isinstance(x, bool):
|
||||||
|
return x
|
||||||
|
if isinstance(x, str):
|
||||||
|
s = x.strip()
|
||||||
|
if not s:
|
||||||
|
return x
|
||||||
|
try:
|
||||||
|
if re.fullmatch(r"-?\d+", s):
|
||||||
|
return int(s)
|
||||||
|
xf = float(s)
|
||||||
|
except ValueError:
|
||||||
|
return x
|
||||||
|
if not math.isfinite(xf):
|
||||||
|
return x
|
||||||
|
return compact_float_for_prompt(xf)
|
||||||
|
if isinstance(x, Decimal):
|
||||||
|
try:
|
||||||
|
xf = float(x)
|
||||||
|
except Exception:
|
||||||
|
return x
|
||||||
|
if not math.isfinite(xf):
|
||||||
|
return x
|
||||||
|
return compact_float_for_prompt(xf)
|
||||||
|
if isinstance(x, float):
|
||||||
|
if not math.isfinite(x):
|
||||||
|
return x
|
||||||
|
return compact_float_for_prompt(x)
|
||||||
|
try:
|
||||||
|
xf = float(x)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return x
|
||||||
|
if not math.isfinite(xf):
|
||||||
|
return x
|
||||||
|
return compact_float_for_prompt(xf)
|
||||||
|
|
||||||
|
|
||||||
|
def compact_json_payload_for_prompts(obj: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Tiefe Kopie mit kompakten Zahlen (dicts/list/tuples rekursiv).
|
||||||
|
Strings und dict-Keys werden nicht verändert.
|
||||||
|
"""
|
||||||
|
if obj is None:
|
||||||
|
return None
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {k: compact_json_payload_for_prompts(v) for k, v in obj.items()}
|
||||||
|
if isinstance(obj, (list, tuple)):
|
||||||
|
t = [compact_json_payload_for_prompts(v) for v in obj]
|
||||||
|
return tuple(t) if isinstance(obj, tuple) else t
|
||||||
|
return normalize_prompt_number(obj)
|
||||||
|
|
||||||
|
|
||||||
|
def format_scalar_for_prompt_text(x: Any) -> str:
|
||||||
|
"""
|
||||||
|
Kurzdarstellung für Text-Platzhalter (activity_detail, Tabellen, …).
|
||||||
|
Alle Zahlenpfade über normalize_prompt_number; Ausgabe kurz (%g, keine Float-Schweife).
|
||||||
|
"""
|
||||||
|
if x is None:
|
||||||
|
return "—"
|
||||||
|
if isinstance(x, bool):
|
||||||
|
return "ja" if x else "nein"
|
||||||
|
n = normalize_prompt_number(x)
|
||||||
|
if isinstance(n, bool):
|
||||||
|
return "ja" if n else "nein"
|
||||||
|
if isinstance(n, str):
|
||||||
|
return n
|
||||||
|
if isinstance(n, int) and not isinstance(n, bool):
|
||||||
|
return str(n)
|
||||||
|
if isinstance(n, float):
|
||||||
|
if not math.isfinite(n):
|
||||||
|
return str(n)
|
||||||
|
return "%g" % n
|
||||||
|
return str(n)
|
||||||
|
|
||||||
|
|
||||||
|
def session_metrics_list_to_key_value_compact(metrics: list[Any] | None) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Session-Metriken für KI-JSON: nur key → Wert (keine wiederholten Namen/Beschreibungen).
|
||||||
|
|
||||||
|
Semantik: {{training_parameters_glossary_md}} im Prompt ergänzen.
|
||||||
|
"""
|
||||||
|
out: dict[str, Any] = {}
|
||||||
|
for m in metrics or []:
|
||||||
|
if not isinstance(m, dict):
|
||||||
|
continue
|
||||||
|
k = m.get("key")
|
||||||
|
if not k:
|
||||||
|
continue
|
||||||
|
v = m.get("value")
|
||||||
|
dt = (m.get("data_type") or "").lower()
|
||||||
|
if v is None:
|
||||||
|
out[str(k)] = None
|
||||||
|
continue
|
||||||
|
if dt == "integer":
|
||||||
|
try:
|
||||||
|
out[str(k)] = int(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
out[str(k)] = normalize_prompt_number(v)
|
||||||
|
elif dt == "boolean":
|
||||||
|
out[str(k)] = bool(v)
|
||||||
|
elif dt == "string":
|
||||||
|
out[str(k)] = normalize_prompt_number(v)
|
||||||
|
else:
|
||||||
|
out[str(k)] = normalize_prompt_number(v)
|
||||||
|
return out
|
||||||
|
|
@ -130,8 +130,8 @@ def register_activity_session_insights():
|
||||||
key="training_sessions_recent_json",
|
key="training_sessions_recent_json",
|
||||||
category="Aktivität",
|
category="Aktivität",
|
||||||
description=(
|
description=(
|
||||||
"JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics[] — gemergte Profil-Metriken "
|
"JSON: ISO-Wochen mit Sessions (activity_log-Kopf) plus session_metrics als kompaktes "
|
||||||
"(dynamische Keys)"
|
"{key: Wert}-Objekt; Zahlen für Prompts gekürzt. Semantik: {{training_parameters_glossary_md}}."
|
||||||
),
|
),
|
||||||
resolver_module="backend/placeholder_resolver.py",
|
resolver_module="backend/placeholder_resolver.py",
|
||||||
resolver_function="_safe_json",
|
resolver_function="_safe_json",
|
||||||
|
|
@ -141,13 +141,10 @@ def register_activity_session_insights():
|
||||||
semantic_contract=(
|
semantic_contract=(
|
||||||
"Root: weeks[] mit week_iso; sessions[] pro Einheit u. a. id, date, activity_type, "
|
"Root: weeks[] mit week_iso; sessions[] pro Einheit u. a. id, date, activity_type, "
|
||||||
"duration_min, kcal_active, hr_avg, hr_max, rpe, training_category, training_type_name, "
|
"duration_min, kcal_active, hr_avg, hr_max, rpe, training_category, training_type_name, "
|
||||||
"session_metrics[]. "
|
"session_metrics (Objekt key→Wert, keine wiederholten Labels). "
|
||||||
"session_metrics: effektive Liste nach merge_column_backed_and_eav_metrics — Einträge mit "
|
"Merge wie merge_column_backed_and_eav_metrics; nur Keys aus Attributschema. "
|
||||||
"training_parameter_id, key, data_type, unit, value, name_de/name_en, description_de/description_en; "
|
"meta.session_metrics_shape=key_value, meta.metric_semantics_placeholder verweist auf Glossary-Platzhalter. "
|
||||||
"nur Parameter aus Attributschema "
|
"Alle JSON-Platzhalter mit _safe_json: Zahlen rekursiv kompakt gerundet. "
|
||||||
"(training_category_parameter + training_type_parameter Overrides), keys sortiert. "
|
|
||||||
"Kanon Lesen: activity_log-Spalte vor EAV bei Konflikt. "
|
|
||||||
"meta: weeks_requested, days_loaded, session_count, confidence. "
|
|
||||||
"Default ca. 4 ISO-Wochen (28 Tage Rohdatenfenster)."
|
"Default ca. 4 ISO-Wochen (28 Tage Rohdatenfenster)."
|
||||||
),
|
),
|
||||||
business_meaning="Rohkontext für wochenweise Auswertung (Erholung, Intensität) in der KI",
|
business_meaning="Rohkontext für wochenweise Auswertung (Erholung, Intensität) in der KI",
|
||||||
|
|
@ -171,7 +168,7 @@ def register_activity_session_insights():
|
||||||
"session_metrics oft [] (kein Typ, kein Profil, keine gespeicherten Werte). "
|
"session_metrics oft [] (kein Typ, kein Profil, keine gespeicherten Werte). "
|
||||||
"Anzahl und Namen der Metrik-Keys sind instanz-/adminabhängig — JSON nicht als festes Schema "
|
"Anzahl und Namen der Metrik-Keys sind instanz-/adminabhängig — JSON nicht als festes Schema "
|
||||||
"für Downstream-Parsing harter Logik verwenden. "
|
"für Downstream-Parsing harter Logik verwenden. "
|
||||||
"Für KI-Semantik zusätzlich {{training_parameters_glossary_md}} (gesamter aktiver Katalog) in den Prompt legen. "
|
"Pflicht für Metrik-Bedeutung: {{training_parameters_glossary_md}} (Katalog); im JSON keine Namen/Beschreibungen pro Session. "
|
||||||
"Composite-Parameter (JSON in EAV) noch nicht im MVP expandiert; ggf. Roh-value_text in späterer Phase."
|
"Composite-Parameter (JSON in EAV) noch nicht im MVP expandiert; ggf. Roh-value_text in späterer Phase."
|
||||||
),
|
),
|
||||||
layer_1_decision="activity_metrics.get_training_sessions_recent_weeks_data",
|
layer_1_decision="activity_metrics.get_training_sessions_recent_weeks_data",
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,8 @@ from data_layer.nutrition_metrics import (
|
||||||
get_nutrition_days_data,
|
get_nutrition_days_data,
|
||||||
get_protein_targets_data
|
get_protein_targets_data
|
||||||
)
|
)
|
||||||
|
from data_layer.prompt_output_compact import format_scalar_for_prompt_text
|
||||||
|
|
||||||
from data_layer.activity_metrics import (
|
from data_layer.activity_metrics import (
|
||||||
get_activity_summary_data,
|
get_activity_summary_data,
|
||||||
get_activity_detail_data,
|
get_activity_detail_data,
|
||||||
|
|
@ -48,6 +50,8 @@ from data_layer.health_metrics import (
|
||||||
get_vo2_max_data
|
get_vo2_max_data
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from data_layer.prompt_output_compact import compact_json_payload_for_prompts
|
||||||
|
|
||||||
from placeholder_registry import build_ai_placeholder_caption, get_registry
|
from placeholder_registry import build_ai_placeholder_caption, get_registry
|
||||||
|
|
||||||
# {{key|d}} — nur description anhängen; {{key|x}} — nur Erklärung (ai_caption / Registry)
|
# {{key|d}} — nur description anhängen; {{key|x}} — nur Erklärung (ai_caption / Registry)
|
||||||
|
|
@ -348,7 +352,11 @@ def get_activity_summary(profile_id: str, days: int = 14) -> str:
|
||||||
if data['confidence'] == 'insufficient':
|
if data['confidence'] == 'insufficient':
|
||||||
return f"Keine Aktivitäten in den letzten {days} Tagen"
|
return f"Keine Aktivitäten in den letzten {days} Tagen"
|
||||||
|
|
||||||
return f"{data['activity_count']} Einheiten in {days} Tagen (Ø {data['avg_duration_min']} min/Einheit, {data['total_kcal']} kcal gesamt)"
|
return (
|
||||||
|
f"{data['activity_count']} Einheiten in {days} Tagen (Ø "
|
||||||
|
f"{format_scalar_for_prompt_text(data['avg_duration_min'])} min/Einheit, "
|
||||||
|
f"{format_scalar_for_prompt_text(data['total_kcal'])} kcal gesamt)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def calculate_age(dob) -> str:
|
def calculate_age(dob) -> str:
|
||||||
|
|
@ -421,18 +429,23 @@ def get_activity_detail(profile_id: str, days: int = 14) -> str:
|
||||||
# Format as readable list (max 20 entries to avoid token bloat)
|
# Format as readable list (max 20 entries to avoid token bloat)
|
||||||
lines = []
|
lines = []
|
||||||
for activity in data["activities"][:20]:
|
for activity in data["activities"][:20]:
|
||||||
hr_str = f", HF={activity['hr_avg']}" if activity.get("hr_avg") else ""
|
hr_str = (
|
||||||
|
f", HF={format_scalar_for_prompt_text(activity['hr_avg'])}"
|
||||||
|
if activity.get("hr_avg") is not None
|
||||||
|
else ""
|
||||||
|
)
|
||||||
eav_parts = []
|
eav_parts = []
|
||||||
for m in activity.get("session_metrics") or []:
|
for m in activity.get("session_metrics") or []:
|
||||||
k, v = m.get("key"), m.get("value")
|
k, v = m.get("key"), m.get("value")
|
||||||
if k is None or v is None:
|
if k is None or v is None:
|
||||||
continue
|
continue
|
||||||
label = m.get("name_de") or m.get("name_en") or k
|
label = m.get("name_de") or m.get("name_en") or k
|
||||||
eav_parts.append(f"{label} ({k})={v}")
|
eav_parts.append(f"{label} ({k})={format_scalar_for_prompt_text(v)}")
|
||||||
eav_str = f" | EAV: {'; '.join(eav_parts)}" if eav_parts else ""
|
eav_str = f" | EAV: {'; '.join(eav_parts)}" if eav_parts else ""
|
||||||
lines.append(
|
lines.append(
|
||||||
f"{activity['date']}: {activity['activity_type']} "
|
f"{activity['date']}: {activity['activity_type']} "
|
||||||
f"({activity['duration_min']}min, {activity['kcal_active']}kcal{hr_str}{eav_str})"
|
f"({format_scalar_for_prompt_text(activity['duration_min'])}min, "
|
||||||
|
f"{format_scalar_for_prompt_text(activity['kcal_active'])}kcal{hr_str}{eav_str})"
|
||||||
)
|
)
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
@ -1028,8 +1041,8 @@ def _safe_json(func_name: str, profile_id: str) -> str:
|
||||||
# If already string, return it; otherwise convert to JSON
|
# If already string, return it; otherwise convert to JSON
|
||||||
if isinstance(result, str):
|
if isinstance(result, str):
|
||||||
return result
|
return result
|
||||||
else:
|
compacted = compact_json_payload_for_prompts(result)
|
||||||
return json.dumps(result, ensure_ascii=False, default=str)
|
return json.dumps(compacted, ensure_ascii=False, default=str)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[ERROR] _safe_json({func_name}, {profile_id}): {type(e).__name__}: {e}")
|
print(f"[ERROR] _safe_json({func_name}, {profile_id}): {type(e).__name__}: {e}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ from csv_parser.core import (
|
||||||
iter_csv_dict_rows,
|
iter_csv_dict_rows,
|
||||||
normalize_header_for_signature,
|
normalize_header_for_signature,
|
||||||
parse_csv_sample,
|
parse_csv_sample,
|
||||||
|
resolve_effective_csv_delimiter,
|
||||||
)
|
)
|
||||||
from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping
|
from csv_parser.type_converter import build_row_after_mapping, diagnose_row_mapping
|
||||||
from csv_parser.field_units import source_unit_choices_for_field
|
from csv_parser.field_units import source_unit_choices_for_field
|
||||||
|
|
@ -393,7 +394,8 @@ async def csv_import_diagnose(
|
||||||
tc = m.get("type_conversions")
|
tc = m.get("type_conversions")
|
||||||
if not isinstance(tc, dict):
|
if not isinstance(tc, dict):
|
||||||
tc = {}
|
tc = {}
|
||||||
delim = str(m.get("delimiter") or ",")
|
tpl_delim = str(m.get("delimiter") or ",").strip() or ","
|
||||||
|
delim = resolve_effective_csv_delimiter(text, tpl_delim)
|
||||||
exec_module = str(m["module"])
|
exec_module = str(m["module"])
|
||||||
|
|
||||||
rows_out: list[dict[str, Any]] = []
|
rows_out: list[dict[str, Any]] = []
|
||||||
|
|
@ -418,6 +420,7 @@ async def csv_import_diagnose(
|
||||||
"mapping_id": mapping_id,
|
"mapping_id": mapping_id,
|
||||||
"mapping_name": m.get("mapping_name"),
|
"mapping_name": m.get("mapping_name"),
|
||||||
"module": exec_module,
|
"module": exec_module,
|
||||||
|
"delimiter_template": tpl_delim,
|
||||||
"delimiter_used": delim,
|
"delimiter_used": delim,
|
||||||
"has_header": bool(m.get("has_header", True)),
|
"has_header": bool(m.get("has_header", True)),
|
||||||
"rows_diagnosed": len(rows_out),
|
"rows_diagnosed": len(rows_out),
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from unittest.mock import patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from data_layer.activity_session_metrics import (
|
from data_layer.activity_session_metrics import (
|
||||||
|
_normalize_metric_value_for_read,
|
||||||
ActivitySessionMetricsError,
|
ActivitySessionMetricsError,
|
||||||
enrich_sessions_with_metrics,
|
enrich_sessions_with_metrics,
|
||||||
merge_column_backed_and_eav_metrics,
|
merge_column_backed_and_eav_metrics,
|
||||||
|
|
@ -121,6 +122,38 @@ def test_merge_parameter_schema_includes_descriptions():
|
||||||
assert merged[0]["description_en"] == "5 min average power"
|
assert merged[0]["description_en"] == "5 min average power"
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_eav_float_value_normalized_no_long_tail():
|
||||||
|
"""Layer 1: lange Floats (z. B. kcal_per_km) für Lesepfad kompakt."""
|
||||||
|
schema = [
|
||||||
|
{
|
||||||
|
"training_parameter_id": 1,
|
||||||
|
"key": "kcal_per_km",
|
||||||
|
"data_type": "float",
|
||||||
|
"unit": "kcal/km",
|
||||||
|
"validation_rules": {},
|
||||||
|
"source_field": None,
|
||||||
|
"name_de": "Kcal/km",
|
||||||
|
"name_en": "kcal/km",
|
||||||
|
"description_de": None,
|
||||||
|
"description_en": None,
|
||||||
|
"param_category": "performance",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
eav = [
|
||||||
|
{
|
||||||
|
"training_parameter_id": 1,
|
||||||
|
"key": "kcal_per_km",
|
||||||
|
"data_type": "float",
|
||||||
|
"unit": "kcal/km",
|
||||||
|
"value": 51.5818181818181818,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
out = merge_column_backed_and_eav_metrics({}, schema, eav)
|
||||||
|
assert len(out) == 1
|
||||||
|
v = out[0]["value"]
|
||||||
|
assert "581818" not in repr(v)
|
||||||
|
|
||||||
|
|
||||||
def test_merge_column_backed_includes_human_labels_from_schema():
|
def test_merge_column_backed_includes_human_labels_from_schema():
|
||||||
schema = [
|
schema = [
|
||||||
{
|
{
|
||||||
|
|
@ -174,6 +207,11 @@ def test_row_value_tuple_mapping():
|
||||||
assert _row_value_tuple("boolean", True) == (None, None, None, True)
|
assert _row_value_tuple("boolean", True) == (None, None, None, True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_metric_string_dtype_compacts_numeric_strings():
|
||||||
|
assert _normalize_metric_value_for_read("string", "51.58181818181818") == 52
|
||||||
|
assert _normalize_metric_value_for_read("string", "Freitext") == "Freitext"
|
||||||
|
|
||||||
|
|
||||||
class _FakeCursor:
|
class _FakeCursor:
|
||||||
"""Sequences fetchone/fetchall for resolve_activity_attribute_schema."""
|
"""Sequences fetchone/fetchall for resolve_activity_attribute_schema."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from csv_parser.core import (
|
||||||
headers_signature_rank_metrics,
|
headers_signature_rank_metrics,
|
||||||
get_csv_import_limits,
|
get_csv_import_limits,
|
||||||
iter_csv_dict_rows,
|
iter_csv_dict_rows,
|
||||||
|
resolve_effective_csv_delimiter,
|
||||||
)
|
)
|
||||||
from csv_parser.field_units import source_unit_choices_for_field
|
from csv_parser.field_units import source_unit_choices_for_field
|
||||||
from csv_parser.mapping_suggest import build_type_conversions_for_mapping
|
from csv_parser.mapping_suggest import build_type_conversions_for_mapping
|
||||||
|
|
@ -29,6 +30,20 @@ def test_sniff_delimiter():
|
||||||
assert sniff_delimiter("a,b,c") == ","
|
assert sniff_delimiter("a,b,c") == ","
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_effective_csv_delimiter_semicolon_file_comma_template():
|
||||||
|
"""DE-Apple: «;» in der Datei, englische Vorlage speichert «,»."""
|
||||||
|
header = "Workout Type;Start;End;Duration;Aktive Energie (kJ)"
|
||||||
|
row = "Laufen;2026-04-17 16:25;2026-04-17 17:00;00:30:00;500"
|
||||||
|
text = header + "\n" + row + "\n"
|
||||||
|
assert resolve_effective_csv_delimiter(text, ",") == ";"
|
||||||
|
assert resolve_effective_csv_delimiter(text, None) == ";"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_effective_csv_delimiter_comma_file_keeps_template():
|
||||||
|
text = "Workout Type,Start,End\nWalk,2026-04-17 16:25,2026-04-17 17:00\n"
|
||||||
|
assert resolve_effective_csv_delimiter(text, ",") == ","
|
||||||
|
|
||||||
|
|
||||||
def test_parse_csv_sample_header():
|
def test_parse_csv_sample_header():
|
||||||
text = "Date;kcal\n2024-01-01;2000\n"
|
text = "Date;kcal\n2024-01-01;2000\n"
|
||||||
headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3)
|
headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3)
|
||||||
|
|
|
||||||
89
backend/tests/test_prompt_output_compact.py
Normal file
89
backend/tests/test_prompt_output_compact.py
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
"""Tests für data_layer.prompt_output_compact (KI-Platzhalter, Token)."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from data_layer.prompt_output_compact import (
|
||||||
|
compact_float_for_prompt,
|
||||||
|
compact_json_payload_for_prompts,
|
||||||
|
format_scalar_for_prompt_text,
|
||||||
|
normalize_prompt_number,
|
||||||
|
session_metrics_list_to_key_value_compact,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"x,expected",
|
||||||
|
[
|
||||||
|
(0.0, 0),
|
||||||
|
(123.456, 123),
|
||||||
|
(45.67, 46),
|
||||||
|
(9.876, 9.88),
|
||||||
|
(0.99, 0.99),
|
||||||
|
(0.055, 0.055),
|
||||||
|
(0.01234, 0.012),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_compact_float_for_prompt(x, expected):
|
||||||
|
out = compact_float_for_prompt(x)
|
||||||
|
if isinstance(expected, float):
|
||||||
|
assert abs(float(out) - expected) < 0.0001
|
||||||
|
else:
|
||||||
|
assert out == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_compact_json_nested():
|
||||||
|
raw = {"a": 12.345678, "b": {"c": 0.0666}, "d": [1.111, 2.0]}
|
||||||
|
out = compact_json_payload_for_prompts(raw)
|
||||||
|
assert out["a"] == 12
|
||||||
|
assert abs(out["b"]["c"] - 0.067) < 0.001
|
||||||
|
assert out["d"][0] == 1.11
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_scalar_no_long_float_tail():
|
||||||
|
s = format_scalar_for_prompt_text(51.5818181818181818)
|
||||||
|
assert "181818" not in s
|
||||||
|
assert len(s) <= 8
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_scalar_numeric_string_no_long_tail():
|
||||||
|
s = format_scalar_for_prompt_text("51.581818181818181818")
|
||||||
|
assert "181818" not in s
|
||||||
|
|
||||||
|
|
||||||
|
def test_session_metrics_string_dtype_compacts_numeric_strings():
|
||||||
|
sm = [
|
||||||
|
{
|
||||||
|
"key": "temp_c",
|
||||||
|
"data_type": "string",
|
||||||
|
"value": "22.333333333333336",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "kcal_per_km",
|
||||||
|
"data_type": "string",
|
||||||
|
"value": "51.581818181818181818",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
out = session_metrics_list_to_key_value_compact(sm)
|
||||||
|
assert out["temp_c"] == 22
|
||||||
|
assert out["kcal_per_km"] == 52
|
||||||
|
|
||||||
|
|
||||||
|
def test_session_metrics_key_value_only():
|
||||||
|
sm = [
|
||||||
|
{
|
||||||
|
"key": "rpe",
|
||||||
|
"data_type": "integer",
|
||||||
|
"value": 7,
|
||||||
|
"name_de": "RPE",
|
||||||
|
"description_de": "lang",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "watts",
|
||||||
|
"data_type": "float",
|
||||||
|
"value": 199.999,
|
||||||
|
"unit": "W",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
out = session_metrics_list_to_key_value_compact(sm)
|
||||||
|
assert out == {"rpe": 7, "watts": 200}
|
||||||
|
assert "name_de" not in str(out)
|
||||||
Loading…
Reference in New Issue
Block a user