- Added permissions for editing and deleting CSV field mappings. - Created type converter for CSV cells to handle various data types. - Implemented database migrations for CSV field mappings and import logs. - Seeded initial system templates for nutrition and activity data imports. - Developed admin endpoints for managing system CSV templates. - Introduced user endpoints for CSV import analysis and mapping retrieval. - Added tests for core CSV parser functionalities, including delimiter detection and value conversion.
255 lines
8.3 KiB
Python
255 lines
8.3 KiB
Python
"""
|
|
CSV-Import: Nutzer-Endpunkte für Analyse, Mappings, Limits (Issue #21).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any, Optional
|
|
|
|
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
|
|
from pydantic import BaseModel
|
|
|
|
from psycopg2.extras import Json
|
|
|
|
from auth import require_auth
|
|
from db import get_db, get_cursor, r2d
|
|
from csv_parser.core import (
|
|
decode_raw_bytes,
|
|
column_signature,
|
|
get_csv_import_limits,
|
|
headers_signature_match_score,
|
|
normalize_header_for_signature,
|
|
parse_csv_sample,
|
|
)
|
|
from csv_parser.module_registry import get_module_definition, list_modules, validate_field_mappings
|
|
|
|
router = APIRouter(prefix="/api/csv", tags=["csv-import"])
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _load_import_limits() -> dict[str, int]:
|
|
with get_db() as conn:
|
|
cur = get_cursor(conn)
|
|
cur.execute("SELECT value FROM system_config WHERE key = %s", ("csv_import",))
|
|
row = cur.fetchone()
|
|
return get_csv_import_limits(r2d(row) if row else None)
|
|
|
|
|
|
def _mapping_to_summary(m: dict) -> dict:
|
|
return {
|
|
"id": m["id"],
|
|
"module": m["module"],
|
|
"name": m["mapping_name"],
|
|
"description": m.get("description"),
|
|
"is_system": m["is_system"],
|
|
"usage_count": m.get("usage_count"),
|
|
"success_rate": m.get("success_rate"),
|
|
"last_used_at": m.get("last_used_at"),
|
|
"created_at": m.get("created_at"),
|
|
}
|
|
|
|
|
|
@router.get("/modules")
|
|
def csv_modules(session: dict = Depends(require_auth)):
|
|
"""Unterstützte Import-Module und Felddefinitionen."""
|
|
out = []
|
|
for mid in list_modules():
|
|
d = get_module_definition(mid)
|
|
if d:
|
|
out.append({"id": mid, "table": d["table"], "fields": d["fields"]})
|
|
return {"modules": out}
|
|
|
|
|
|
@router.get("/limits")
|
|
def csv_limits(session: dict = Depends(require_auth)):
|
|
"""Admin-konfigurierbare Import-Limits (system_config.csv_import)."""
|
|
return _load_import_limits()
|
|
|
|
|
|
@router.get("/mappings")
|
|
def list_csv_mappings(
|
|
module: Optional[str] = None,
|
|
session: dict = Depends(require_auth),
|
|
):
|
|
"""System-Templates + eigene User-Mappings."""
|
|
pid = str(session["profile_id"])
|
|
with get_db() as conn:
|
|
cur = get_cursor(conn)
|
|
cur.execute(
|
|
"""
|
|
SELECT id, module, mapping_name, description, is_system, profile_id,
|
|
usage_count, success_rate, last_used_at, created_at
|
|
FROM csv_field_mappings
|
|
WHERE is_system = true
|
|
AND (%s::text IS NULL OR module = %s)
|
|
ORDER BY usage_count DESC NULLS LAST, mapping_name
|
|
""",
|
|
(module, module),
|
|
)
|
|
system_rows = [r2d(r) for r in cur.fetchall()]
|
|
|
|
cur.execute(
|
|
"""
|
|
SELECT id, module, mapping_name, description, is_system, profile_id,
|
|
usage_count, success_rate, last_used_at, created_at
|
|
FROM csv_field_mappings
|
|
WHERE is_system = false AND profile_id = %s::uuid
|
|
AND (%s::text IS NULL OR module = %s)
|
|
ORDER BY last_used_at DESC NULLS LAST, mapping_name
|
|
""",
|
|
(pid, module, module),
|
|
)
|
|
user_rows = [r2d(r) for r in cur.fetchall()]
|
|
|
|
return {
|
|
"system_templates": [_mapping_to_summary(m) for m in system_rows],
|
|
"user_mappings": [_mapping_to_summary(m) for m in user_rows],
|
|
}
|
|
|
|
|
|
class CopyMappingBody(BaseModel):
|
|
name: Optional[str] = None
|
|
|
|
|
|
@router.post("/mappings/{mapping_id}/copy")
|
|
def copy_csv_mapping(
|
|
mapping_id: int,
|
|
body: CopyMappingBody | None = None,
|
|
session: dict = Depends(require_auth),
|
|
):
|
|
"""System- oder eigenes Mapping als neues User-Mapping kopieren."""
|
|
pid = str(session["profile_id"])
|
|
with get_db() as conn:
|
|
cur = get_cursor(conn)
|
|
cur.execute(
|
|
"""
|
|
SELECT * FROM csv_field_mappings WHERE id = %s
|
|
""",
|
|
(mapping_id,),
|
|
)
|
|
src = r2d(cur.fetchone())
|
|
if not src:
|
|
raise HTTPException(404, "Mapping nicht gefunden")
|
|
if not src["is_system"] and str(src.get("profile_id")) != pid:
|
|
raise HTTPException(403, "Kein Zugriff auf dieses Mapping")
|
|
|
|
base_name = (body.name if body and body.name else None) or f"{src['mapping_name']} (Kopie)"
|
|
name = base_name
|
|
n = 1
|
|
while True:
|
|
cur.execute(
|
|
"""
|
|
SELECT 1 FROM csv_field_mappings
|
|
WHERE profile_id = %s::uuid AND module = %s AND mapping_name = %s
|
|
""",
|
|
(pid, src["module"], name),
|
|
)
|
|
if not cur.fetchone():
|
|
break
|
|
n += 1
|
|
name = f"{base_name} {n}"
|
|
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO csv_field_mappings (
|
|
profile_id, is_system, module, mapping_name, description,
|
|
column_signature, delimiter, encoding, has_header,
|
|
field_mappings, type_conversions, usage_count, success_rate
|
|
) VALUES (
|
|
%s::uuid, false, %s, %s, %s,
|
|
%s, %s, %s, %s, %s, %s, 0, 1.0
|
|
) RETURNING id
|
|
""",
|
|
(
|
|
pid,
|
|
src["module"],
|
|
name,
|
|
src.get("description"),
|
|
src["column_signature"],
|
|
src["delimiter"],
|
|
src["encoding"],
|
|
src["has_header"],
|
|
Json(src["field_mappings"]),
|
|
Json(src["type_conversions"]) if src.get("type_conversions") is not None else None,
|
|
),
|
|
)
|
|
new_id = cur.fetchone()["id"]
|
|
return {"new_mapping_id": new_id, "mapping_name": name}
|
|
|
|
|
|
@router.post("/analyze")
|
|
async def analyze_csv(
|
|
file: UploadFile = File(...),
|
|
module: str = Form(...),
|
|
delimiter: Optional[str] = Form(default=None),
|
|
session: dict = Depends(require_auth),
|
|
):
|
|
"""
|
|
Erste Zeilen parsen, Signatur bilden, System-Templates nach Ähnlichkeit ranken.
|
|
"""
|
|
if not get_module_definition(module):
|
|
raise HTTPException(400, f"Unbekanntes Modul: {module}")
|
|
|
|
raw = await file.read()
|
|
limits = _load_import_limits()
|
|
max_bytes = limits.get("max_file_bytes", 52_428_800)
|
|
if len(raw) > max_bytes:
|
|
raise HTTPException(
|
|
413,
|
|
f"Datei zu groß (max. {max_bytes} Bytes laut Systemkonfiguration)",
|
|
)
|
|
|
|
text = decode_raw_bytes(raw)
|
|
max_rows = limits.get("max_rows_per_file", 50_000)
|
|
if text.count("\n") > max_rows + 5:
|
|
raise HTTPException(
|
|
413,
|
|
f"Zu viele Zeilen (>{max_rows}) laut Systemkonfiguration csv_import.max_rows_per_file",
|
|
)
|
|
delim = delimiter if delimiter in (",", ";", "\t") else None
|
|
headers, sample_rows, used_delim = parse_csv_sample(text, delimiter=delim, max_data_rows=5)
|
|
sig = column_signature(headers)
|
|
|
|
mod_def = get_module_definition(module)
|
|
available_fields = mod_def["fields"] if mod_def else {}
|
|
|
|
with get_db() as conn:
|
|
cur = get_cursor(conn)
|
|
cur.execute(
|
|
"""
|
|
SELECT id, module, mapping_name, description, column_signature,
|
|
delimiter, encoding, has_header, field_mappings, type_conversions, is_system
|
|
FROM csv_field_mappings
|
|
WHERE is_system = true AND module = %s
|
|
""",
|
|
(module,),
|
|
)
|
|
templates = [r2d(r) for r in cur.fetchall()]
|
|
|
|
ranked = []
|
|
for t in templates:
|
|
t_sig = list(t["column_signature"]) if t["column_signature"] else []
|
|
t_norm = sorted({normalize_header_for_signature(str(s)) for s in t_sig})
|
|
score = headers_signature_match_score(sig, t_norm)
|
|
ranked.append(
|
|
{
|
|
"mapping_id": t["id"],
|
|
"mapping_name": t["mapping_name"],
|
|
"confidence": round(score, 4),
|
|
"match_type": "signature_jaccard",
|
|
}
|
|
)
|
|
ranked.sort(key=lambda x: -x["confidence"])
|
|
|
|
return {
|
|
"module": module,
|
|
"filename": file.filename,
|
|
"encoding_note": "utf-8/latin-1 mit BOM-Strip",
|
|
"delimiter": used_delim,
|
|
"columns": headers,
|
|
"column_signature_normalized": sig,
|
|
"sample_rows": sample_rows,
|
|
"detected_mappings": ranked[:5],
|
|
"available_fields": available_fields,
|
|
}
|