feat: Improve float parsing logic for enhanced accuracy in numeric conversions
- Updated the `_parse_float_auto` function in `type_converter.py` to better handle various decimal and thousand separators, particularly for cases with long decimal parts from sources like Apple Health. - Enhanced the logic for splitting and processing numeric strings to ensure correct interpretation of values, including edge cases with multiple separators. - Added handling for cases where numeric strings may contain both commas and periods, improving overall robustness in float parsing. These changes enhance the accuracy of numeric conversions, ensuring more reliable data processing across the application.
This commit is contained in:
parent
d7cefdd9e9
commit
90a27846ca
|
|
@ -66,7 +66,13 @@ def _parse_float_auto(s: str) -> float:
|
|||
"""
|
||||
Heuristik ohne festes Locale: Punkt/Komma als Tausender vs. Dezimal,
|
||||
basierend auf der letzten erkannten Trennstelle und Gruppierung.
|
||||
|
||||
Apple Health u. a. liefern berechnete Mittelwerte mit vielen Nachkommastellen
|
||||
(z. B. «96.874937…») und Energie als «596.668904…» — dabei ist der Punkt
|
||||
immer Dezimaltrenner. Früher wurden lange Nachkommateile fälschlich so
|
||||
behandelt, dass der Punkt entfernt wurde (Tausender-Heuristik).
|
||||
"""
|
||||
raw = s
|
||||
s = _normalize_num_token(s)
|
||||
if not s or s in ("-", "—", "–"):
|
||||
raise ValueError("leer")
|
||||
|
|
@ -90,18 +96,35 @@ def _parse_float_auto(s: str) -> float:
|
|||
s = s.replace(",", "")
|
||||
elif last_comma >= 0:
|
||||
parts = s.split(",")
|
||||
if len(parts) == 2 and len(parts[1]) <= 2:
|
||||
s = parts[0].replace(".", "") + "." + parts[1]
|
||||
elif len(parts) == 2 and len(parts[1]) == 3 and len(parts[0]) <= 3:
|
||||
s = parts[0] + parts[1]
|
||||
if len(parts) == 2:
|
||||
left, right = parts[0], parts[1]
|
||||
if not right:
|
||||
raise ValueError("leer")
|
||||
left_digits = left.replace(".", "")
|
||||
# Langer Nachkommateil → Dezimalkomma; «1.234,56»-Fälle oben mit Punkt+Komma
|
||||
if len(right) > 3 or len(right) <= 2:
|
||||
s = left_digits + "." + right.replace(".", "")
|
||||
elif len(right) == 3 and len(left_digits) <= 3:
|
||||
s = left_digits + right
|
||||
else:
|
||||
s = left_digits + "." + right.replace(".", "")
|
||||
else:
|
||||
s = s.replace(",", "")
|
||||
elif last_dot >= 0:
|
||||
parts = s.split(".")
|
||||
if len(parts) == 2 and len(parts[1]) <= 2:
|
||||
s = parts[0].replace(",", "") + "." + parts[1]
|
||||
elif len(parts) == 2 and len(parts[1]) == 3 and len(parts[0]) <= 3:
|
||||
s = parts[0] + parts[1]
|
||||
if len(parts) == 2:
|
||||
left, right = parts[0], parts[1]
|
||||
if not right:
|
||||
raise ValueError("leer")
|
||||
left_digits = left.replace(",", "")
|
||||
# Genau ein Punkt: viele Nachkommastellen → Apple/US-Dezimalpunkt (nicht „.“ streichen)
|
||||
if len(right) > 3 or len(right) <= 2:
|
||||
s = left_digits + "." + right
|
||||
elif len(right) == 3:
|
||||
if len(left_digits) == 1 and left_digits != "0" and left_digits.isdigit():
|
||||
s = left_digits + right
|
||||
else:
|
||||
s = left_digits + "." + right
|
||||
elif len(parts) > 2:
|
||||
if len(parts[-1]) <= 2:
|
||||
s = "".join(parts[:-1]) + "." + parts[-1]
|
||||
|
|
@ -345,6 +368,18 @@ def _parse_int(raw: str, spec: Mapping[str, Any]) -> int:
|
|||
raise ValueError("leer")
|
||||
v = int(digits)
|
||||
return -v if neg else v
|
||||
# Ohne flexible: «108.0» / «96,8» trotzdem als Zahl mit Nachkommastellen
|
||||
s2 = _normalize_num_token(s)
|
||||
if "," in s2 or "." in s2:
|
||||
dec = spec.get("decimal_separator", ".")
|
||||
try:
|
||||
if dec in (None, "auto"):
|
||||
fv = _parse_float_auto(s2)
|
||||
else:
|
||||
fv = _parse_float(raw, str(dec))
|
||||
return int(round(fv))
|
||||
except (ValueError, InvalidOperation):
|
||||
pass
|
||||
s = re.sub(r"[^\d-]", "", s)
|
||||
if not s:
|
||||
raise ValueError("leer")
|
||||
|
|
|
|||
|
|
@ -81,6 +81,25 @@ def test_convert_kcal_via_source_unit_kj():
|
|||
assert abs(k - 1000.0) < 0.05
|
||||
|
||||
|
||||
def test_apple_health_long_decimal_dot_preserved():
|
||||
"""Apple: Mittel-HF u. Energie mit vielen Nachkommastellen; Punkt ist Dezimaltrenner."""
|
||||
hr_spec = {"type": "int", "flexible": True}
|
||||
r = convert_value("96.8749374730629", "hr_avg", hr_spec, module="activity")
|
||||
assert r == 97
|
||||
|
||||
rest_spec = {"type": "float", "decimal_separator": ".", "flexible": True, "source_unit": "kj"}
|
||||
kcal = convert_value("596.6689047323086", "kcal_resting", rest_spec, module="activity")
|
||||
assert 140.0 < kcal < 145.0
|
||||
|
||||
|
||||
def test_parse_float_auto_us_thousands_comma():
|
||||
"""«12,345» ohne Dezimalpunkt weiter als Tausender möglich."""
|
||||
v = convert_value("12345", "x", {"type": "float", "decimal_separator": "auto"})
|
||||
assert v == 12345.0
|
||||
v2 = convert_value("12,345", "x", {"type": "float", "decimal_separator": "auto"})
|
||||
assert abs(v2 - 12345.0) < 0.01
|
||||
|
||||
|
||||
def test_convert_protein_kg_to_g():
|
||||
spec = {"type": "float", "source_unit": "kg", "decimal_separator": "."}
|
||||
g = convert_value("0.1", "protein_g", spec, module="nutrition")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user