"""Tests für CSV-Parser Foundation (Issue #21).""" import pytest from csv_parser.core import ( decode_raw_bytes, sniff_delimiter, parse_csv_sample, column_signature, headers_signature_match_score, get_csv_import_limits, iter_csv_dict_rows, ) from csv_parser.type_converter import convert_value, build_row_after_mapping def test_decode_bom_utf8(): raw = "\ufeffa;b;c\n1;2;3".encode("utf-8-sig") t = decode_raw_bytes(raw) assert not t.startswith("\ufeff") assert "a;b;c" in t def test_sniff_delimiter(): assert sniff_delimiter("a;b;c;d") == ";" assert sniff_delimiter("a,b,c") == "," def test_parse_csv_sample_header(): text = "Date;kcal\n2024-01-01;2000\n" headers, rows, delim = parse_csv_sample(text, delimiter=";", max_data_rows=3) assert headers == ["Date", "kcal"] assert delim == ";" assert rows[0]["Date"] == "2024-01-01" assert rows[0]["kcal"] == "2000" def test_column_signature_sorted_unique(): sig = column_signature(["B", "a", "a"]) assert sig == ["a", "b"] def test_jaccard(): s1 = column_signature(["Date", "Calories"]) s2 = column_signature(["Date", "Calories", "Fat"]) assert headers_signature_match_score(s1, s2) == pytest.approx(2 / 3) def test_get_csv_import_limits_default(): assert get_csv_import_limits(None)["max_rows_per_file"] == 50_000 def test_convert_date_and_kcal_factor(): d = convert_value("15.01.2024", "date", {"type": "date", "format": "dd.mm.yyyy"}) assert d.year == 2024 and d.month == 1 and d.day == 15 k = convert_value("8000", "kcal", {"type": "float", "conversion_factor": 0.239, "decimal_separator": "."}) assert abs(k - 8000 * 0.239) < 0.01 def test_iter_csv_dict_rows_full_file(): text = "a;b\n1;2\n3;4\n" rows = list(iter_csv_dict_rows(text, ";", has_header=True)) assert rows == [{"a": "1", "b": "2"}, {"a": "3", "b": "4"}] def test_build_row_after_mapping(): csv_row = {"Datum": "01.01.2024", "kj": "4200"} fm = {"Datum": "date", "kj": "kcal"} tc = { "date": {"type": "date", "format": "dd.mm.yyyy"}, "kcal": {"type": "float", "conversion_factor": 0.239, "decimal_separator": "."}, } out = build_row_after_mapping(csv_row, fm, tc) assert out["date"].month == 1 assert out["kcal"] is not None def test_build_row_fddb_raw_header_keys_match_normalized_template(): """FDDB: DictReader liefert deutsche Überschrift, Seed nutzt normalisierten Key.""" csv_row = { "Datum Tag Monat Jahr Stunde Minute": "01.01.2024 8:30", "kJ": "42000", "Fett (g)": "50", "KH (g)": "200", "Protein (g)": "100", } fm = { "datum_tag_monat_jahr_stunde_minute": "date", "kj": "kcal", "fett_g": "fat_g", "kh_g": "carbs_g", "protein_g": "protein_g", } tc = { "date": {"type": "date", "format": "dd.mm.yyyy HH:MM", "extract": "date_only"}, "kcal": { "type": "float", "conversion_factor": 0.239, "decimal_separator": ",", }, "fat_g": {"type": "float", "decimal_separator": ","}, "carbs_g": {"type": "float", "decimal_separator": ","}, "protein_g": {"type": "float", "decimal_separator": ","}, } out = build_row_after_mapping(csv_row, fm, tc) assert out["date"].year == 2024 and out["date"].month == 1 and out["date"].day == 1 def test_convert_date_ddmm_with_seconds(): d = convert_value( "15.01.2024 14:30:00", "date", {"type": "date", "format": "dd.mm.yyyy HH:MM", "extract": "date_only"}, ) assert d.month == 1 and d.day == 15 def test_float_decimal_separator_auto_eu_us(): assert abs(convert_value("1.234,56", "x", {"type": "float", "decimal_separator": "auto"}) - 1234.56) < 1e-9 assert abs(convert_value("1,234.56", "x", {"type": "float", "decimal_separator": "auto"}) - 1234.56) < 1e-9 assert abs(convert_value("1234,5", "x", {"type": "float", "decimal_separator": "auto"}) - 1234.5) < 1e-9 def test_float_flexible_falls_back_to_auto(): spec = {"type": "float", "decimal_separator": ",", "flexible": True} assert abs(convert_value("1234.56", "x", spec) - 1234.56) < 1e-9 def test_date_flexible_iso_while_primary_ddmm(): spec = { "type": "date", "format": "dd.mm.yyyy", "flexible": True, "extract": "date_only", } d1 = convert_value("2024-03-15", "d", spec) d2 = convert_value("15.03.2024", "d", spec) assert d1 == d2 def test_date_extra_formats_without_days_in_name(): spec = { "type": "date", "format": "yyyy-mm-dd", "formats": ["%d.%m.%Y"], "extract": "date_only", } assert convert_value("08.04.2026", "d", spec).day == 8 def test_int_flexible_thousands(): assert convert_value("1.234", "n", {"type": "int", "flexible": True}) == 1234 def test_datetime_flexible(): spec = {"type": "datetime", "format": "yyyy-mm-dd HH:MM:SS", "flexible": True} dtv = convert_value("15.01.2024 14:30:00", "t", spec) assert dtv.month == 1 and dtv.day == 15 and dtv.hour == 14