Merge pull request 'feat: Improve float parsing logic for enhanced accuracy in numeric conversions' (#79) from develop into main
All checks were successful
Deploy Production / deploy (push) Successful in 55s
Build Test / pytest-backend (push) Successful in 8s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 17s

Reviewed-on: #79
This commit is contained in:
Lars 2026-04-12 07:30:44 +02:00
commit 72eb94d186
2 changed files with 62 additions and 8 deletions

View File

@ -66,7 +66,13 @@ def _parse_float_auto(s: str) -> float:
"""
Heuristik ohne festes Locale: Punkt/Komma als Tausender vs. Dezimal,
basierend auf der letzten erkannten Trennstelle und Gruppierung.
Apple Health u. a. liefern berechnete Mittelwerte mit vielen Nachkommastellen
(z. B. «96.874937») und Energie als «596.668904» dabei ist der Punkt
immer Dezimaltrenner. Früher wurden lange Nachkommateile fälschlich so
behandelt, dass der Punkt entfernt wurde (Tausender-Heuristik).
"""
raw = s
s = _normalize_num_token(s)
if not s or s in ("-", "", ""):
raise ValueError("leer")
@ -90,18 +96,35 @@ def _parse_float_auto(s: str) -> float:
s = s.replace(",", "")
elif last_comma >= 0:
parts = s.split(",")
if len(parts) == 2 and len(parts[1]) <= 2:
s = parts[0].replace(".", "") + "." + parts[1]
elif len(parts) == 2 and len(parts[1]) == 3 and len(parts[0]) <= 3:
s = parts[0] + parts[1]
if len(parts) == 2:
left, right = parts[0], parts[1]
if not right:
raise ValueError("leer")
left_digits = left.replace(".", "")
# Langer Nachkommateil → Dezimalkomma; «1.234,56»-Fälle oben mit Punkt+Komma
if len(right) > 3 or len(right) <= 2:
s = left_digits + "." + right.replace(".", "")
elif len(right) == 3 and len(left_digits) <= 3:
s = left_digits + right
else:
s = left_digits + "." + right.replace(".", "")
else:
s = s.replace(",", "")
elif last_dot >= 0:
parts = s.split(".")
if len(parts) == 2 and len(parts[1]) <= 2:
s = parts[0].replace(",", "") + "." + parts[1]
elif len(parts) == 2 and len(parts[1]) == 3 and len(parts[0]) <= 3:
s = parts[0] + parts[1]
if len(parts) == 2:
left, right = parts[0], parts[1]
if not right:
raise ValueError("leer")
left_digits = left.replace(",", "")
# Genau ein Punkt: viele Nachkommastellen → Apple/US-Dezimalpunkt (nicht „.“ streichen)
if len(right) > 3 or len(right) <= 2:
s = left_digits + "." + right
elif len(right) == 3:
if len(left_digits) == 1 and left_digits != "0" and left_digits.isdigit():
s = left_digits + right
else:
s = left_digits + "." + right
elif len(parts) > 2:
if len(parts[-1]) <= 2:
s = "".join(parts[:-1]) + "." + parts[-1]
@ -345,6 +368,18 @@ def _parse_int(raw: str, spec: Mapping[str, Any]) -> int:
raise ValueError("leer")
v = int(digits)
return -v if neg else v
# Ohne flexible: «108.0» / «96,8» trotzdem als Zahl mit Nachkommastellen
s2 = _normalize_num_token(s)
if "," in s2 or "." in s2:
dec = spec.get("decimal_separator", ".")
try:
if dec in (None, "auto"):
fv = _parse_float_auto(s2)
else:
fv = _parse_float(raw, str(dec))
return int(round(fv))
except (ValueError, InvalidOperation):
pass
s = re.sub(r"[^\d-]", "", s)
if not s:
raise ValueError("leer")

View File

@ -81,6 +81,25 @@ def test_convert_kcal_via_source_unit_kj():
assert abs(k - 1000.0) < 0.05
def test_apple_health_long_decimal_dot_preserved():
"""Apple: Mittel-HF u. Energie mit vielen Nachkommastellen; Punkt ist Dezimaltrenner."""
hr_spec = {"type": "int", "flexible": True}
r = convert_value("96.8749374730629", "hr_avg", hr_spec, module="activity")
assert r == 97
rest_spec = {"type": "float", "decimal_separator": ".", "flexible": True, "source_unit": "kj"}
kcal = convert_value("596.6689047323086", "kcal_resting", rest_spec, module="activity")
assert 140.0 < kcal < 145.0
def test_parse_float_auto_us_thousands_comma():
"""«12,345» ohne Dezimalpunkt weiter als Tausender möglich."""
v = convert_value("12345", "x", {"type": "float", "decimal_separator": "auto"})
assert v == 12345.0
v2 = convert_value("12,345", "x", {"type": "float", "decimal_separator": "auto"})
assert abs(v2 - 12345.0) < 0.01
def test_convert_protein_kg_to_g():
spec = {"type": "float", "source_unit": "kg", "decimal_separator": "."}
g = convert_value("0.1", "protein_g", spec, module="nutrition")