feat: Improve float parsing logic for enhanced accuracy in numeric conversions #79
|
|
@ -66,7 +66,13 @@ def _parse_float_auto(s: str) -> float:
|
|||
"""
|
||||
Heuristik ohne festes Locale: Punkt/Komma als Tausender vs. Dezimal,
|
||||
basierend auf der letzten erkannten Trennstelle und Gruppierung.
|
||||
|
||||
Apple Health u. a. liefern berechnete Mittelwerte mit vielen Nachkommastellen
|
||||
(z. B. «96.874937…») und Energie als «596.668904…» — dabei ist der Punkt
|
||||
immer Dezimaltrenner. Früher wurden lange Nachkommateile fälschlich so
|
||||
behandelt, dass der Punkt entfernt wurde (Tausender-Heuristik).
|
||||
"""
|
||||
raw = s
|
||||
s = _normalize_num_token(s)
|
||||
if not s or s in ("-", "—", "–"):
|
||||
raise ValueError("leer")
|
||||
|
|
@ -90,18 +96,35 @@ def _parse_float_auto(s: str) -> float:
|
|||
s = s.replace(",", "")
|
||||
elif last_comma >= 0:
|
||||
parts = s.split(",")
|
||||
if len(parts) == 2 and len(parts[1]) <= 2:
|
||||
s = parts[0].replace(".", "") + "." + parts[1]
|
||||
elif len(parts) == 2 and len(parts[1]) == 3 and len(parts[0]) <= 3:
|
||||
s = parts[0] + parts[1]
|
||||
if len(parts) == 2:
|
||||
left, right = parts[0], parts[1]
|
||||
if not right:
|
||||
raise ValueError("leer")
|
||||
left_digits = left.replace(".", "")
|
||||
# Langer Nachkommateil → Dezimalkomma; «1.234,56»-Fälle oben mit Punkt+Komma
|
||||
if len(right) > 3 or len(right) <= 2:
|
||||
s = left_digits + "." + right.replace(".", "")
|
||||
elif len(right) == 3 and len(left_digits) <= 3:
|
||||
s = left_digits + right
|
||||
else:
|
||||
s = left_digits + "." + right.replace(".", "")
|
||||
else:
|
||||
s = s.replace(",", "")
|
||||
elif last_dot >= 0:
|
||||
parts = s.split(".")
|
||||
if len(parts) == 2 and len(parts[1]) <= 2:
|
||||
s = parts[0].replace(",", "") + "." + parts[1]
|
||||
elif len(parts) == 2 and len(parts[1]) == 3 and len(parts[0]) <= 3:
|
||||
s = parts[0] + parts[1]
|
||||
if len(parts) == 2:
|
||||
left, right = parts[0], parts[1]
|
||||
if not right:
|
||||
raise ValueError("leer")
|
||||
left_digits = left.replace(",", "")
|
||||
# Genau ein Punkt: viele Nachkommastellen → Apple/US-Dezimalpunkt (nicht „.“ streichen)
|
||||
if len(right) > 3 or len(right) <= 2:
|
||||
s = left_digits + "." + right
|
||||
elif len(right) == 3:
|
||||
if len(left_digits) == 1 and left_digits != "0" and left_digits.isdigit():
|
||||
s = left_digits + right
|
||||
else:
|
||||
s = left_digits + "." + right
|
||||
elif len(parts) > 2:
|
||||
if len(parts[-1]) <= 2:
|
||||
s = "".join(parts[:-1]) + "." + parts[-1]
|
||||
|
|
@ -345,6 +368,18 @@ def _parse_int(raw: str, spec: Mapping[str, Any]) -> int:
|
|||
raise ValueError("leer")
|
||||
v = int(digits)
|
||||
return -v if neg else v
|
||||
# Ohne flexible: «108.0» / «96,8» trotzdem als Zahl mit Nachkommastellen
|
||||
s2 = _normalize_num_token(s)
|
||||
if "," in s2 or "." in s2:
|
||||
dec = spec.get("decimal_separator", ".")
|
||||
try:
|
||||
if dec in (None, "auto"):
|
||||
fv = _parse_float_auto(s2)
|
||||
else:
|
||||
fv = _parse_float(raw, str(dec))
|
||||
return int(round(fv))
|
||||
except (ValueError, InvalidOperation):
|
||||
pass
|
||||
s = re.sub(r"[^\d-]", "", s)
|
||||
if not s:
|
||||
raise ValueError("leer")
|
||||
|
|
|
|||
|
|
@ -81,6 +81,25 @@ def test_convert_kcal_via_source_unit_kj():
|
|||
assert abs(k - 1000.0) < 0.05
|
||||
|
||||
|
||||
def test_apple_health_long_decimal_dot_preserved():
|
||||
"""Apple: Mittel-HF u. Energie mit vielen Nachkommastellen; Punkt ist Dezimaltrenner."""
|
||||
hr_spec = {"type": "int", "flexible": True}
|
||||
r = convert_value("96.8749374730629", "hr_avg", hr_spec, module="activity")
|
||||
assert r == 97
|
||||
|
||||
rest_spec = {"type": "float", "decimal_separator": ".", "flexible": True, "source_unit": "kj"}
|
||||
kcal = convert_value("596.6689047323086", "kcal_resting", rest_spec, module="activity")
|
||||
assert 140.0 < kcal < 145.0
|
||||
|
||||
|
||||
def test_parse_float_auto_us_thousands_comma():
|
||||
"""«12,345» ohne Dezimalpunkt weiter als Tausender möglich."""
|
||||
v = convert_value("12345", "x", {"type": "float", "decimal_separator": "auto"})
|
||||
assert v == 12345.0
|
||||
v2 = convert_value("12,345", "x", {"type": "float", "decimal_separator": "auto"})
|
||||
assert abs(v2 - 12345.0) < 0.01
|
||||
|
||||
|
||||
def test_convert_protein_kg_to_g():
|
||||
spec = {"type": "float", "source_unit": "kg", "decimal_separator": "."}
|
||||
g = convert_value("0.1", "protein_g", spec, module="nutrition")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user