mitai-jinkendo/backend/routers/csv_import.py

"""
CSV-Import: Nutzer-Endpunkte für Analyse, Mappings, Limits (Issue #21).
"""
from __future__ import annotations

import logging
from typing import Any, Optional

from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
from pydantic import BaseModel

from psycopg2.extras import Json

from auth import require_auth
from db import get_db, get_cursor, r2d
from csv_parser.core import (
    decode_raw_bytes,
    column_signature,
    get_csv_import_limits,
    headers_signature_match_score,
    normalize_header_for_signature,
    parse_csv_sample,
)
from csv_parser.module_registry import get_module_definition, list_modules, validate_field_mappings

router = APIRouter(prefix="/api/csv", tags=["csv-import"])
logger = logging.getLogger(__name__)


def _load_import_limits() -> dict[str, int]:
    with get_db() as conn:
        cur = get_cursor(conn)
        cur.execute("SELECT value FROM system_config WHERE key = %s", ("csv_import",))
        row = cur.fetchone()
    return get_csv_import_limits(r2d(row) if row else None)


def _mapping_to_summary(m: dict) -> dict:
    return {
        "id": m["id"],
        "module": m["module"],
        "name": m["mapping_name"],
        "description": m.get("description"),
        "is_system": m["is_system"],
        "usage_count": m.get("usage_count"),
        "success_rate": m.get("success_rate"),
        "last_used_at": m.get("last_used_at"),
        "created_at": m.get("created_at"),
    }


@router.get("/modules")
def csv_modules(session: dict = Depends(require_auth)):
    """Unterstützte Import-Module und Felddefinitionen."""
    out = []
    for mid in list_modules():
        d = get_module_definition(mid)
        if d:
            out.append({"id": mid, "table": d["table"], "fields": d["fields"]})
    return {"modules": out}


@router.get("/limits")
def csv_limits(session: dict = Depends(require_auth)):
    """Admin-konfigurierbare Import-Limits (system_config.csv_import)."""
    return _load_import_limits()


@router.get("/mappings")
def list_csv_mappings(
    module: Optional[str] = None,
    session: dict = Depends(require_auth),
):
    """System-Templates + eigene User-Mappings."""
    pid = str(session["profile_id"])
    with get_db() as conn:
        cur = get_cursor(conn)
        cur.execute(
            """
            SELECT id, module, mapping_name, description, is_system, profile_id,
                   usage_count, success_rate, last_used_at, created_at
            FROM csv_field_mappings
            WHERE is_system = true
              AND (%s::text IS NULL OR module = %s)
            ORDER BY usage_count DESC NULLS LAST, mapping_name
            """,
            (module, module),
        )
        system_rows = [r2d(r) for r in cur.fetchall()]

        cur.execute(
            """
            SELECT id, module, mapping_name, description, is_system, profile_id,
                   usage_count, success_rate, last_used_at, created_at
            FROM csv_field_mappings
            WHERE is_system = false AND profile_id = %s::uuid
              AND (%s::text IS NULL OR module = %s)
            ORDER BY last_used_at DESC NULLS LAST, mapping_name
            """,
            (pid, module, module),
        )
        user_rows = [r2d(r) for r in cur.fetchall()]

    return {
        "system_templates": [_mapping_to_summary(m) for m in system_rows],
        "user_mappings": [_mapping_to_summary(m) for m in user_rows],
    }


class CopyMappingBody(BaseModel):
    name: Optional[str] = None


@router.post("/mappings/{mapping_id}/copy")
def copy_csv_mapping(
    mapping_id: int,
    body: CopyMappingBody | None = None,
    session: dict = Depends(require_auth),
):
    """System- oder eigenes Mapping als neues User-Mapping kopieren."""
    pid = str(session["profile_id"])
    with get_db() as conn:
        cur = get_cursor(conn)
        cur.execute(
            """
            SELECT * FROM csv_field_mappings WHERE id = %s
            """,
            (mapping_id,),
        )
        src = r2d(cur.fetchone())
        if not src:
            raise HTTPException(404, "Mapping nicht gefunden")
        if not src["is_system"] and str(src.get("profile_id")) != pid:
            raise HTTPException(403, "Kein Zugriff auf dieses Mapping")

        base_name = (body.name if body and body.name else None) or f"{src['mapping_name']} (Kopie)"
        name = base_name
        n = 1
        while True:
            cur.execute(
                """
                SELECT 1 FROM csv_field_mappings
                WHERE profile_id = %s::uuid AND module = %s AND mapping_name = %s
                """,
                (pid, src["module"], name),
            )
            if not cur.fetchone():
                break
            n += 1
            name = f"{base_name} {n}"

        cur.execute(
            """
            INSERT INTO csv_field_mappings (
                profile_id, is_system, module, mapping_name, description,
                column_signature, delimiter, encoding, has_header,
                field_mappings, type_conversions, usage_count, success_rate
            ) VALUES (
                %s::uuid, false, %s, %s, %s,
                %s, %s, %s, %s, %s, %s, 0, 1.0
            ) RETURNING id
            """,
            (
                pid,
                src["module"],
                name,
                src.get("description"),
                src["column_signature"],
                src["delimiter"],
                src["encoding"],
                src["has_header"],
                Json(src["field_mappings"]),
                Json(src["type_conversions"]) if src.get("type_conversions") is not None else None,
            ),
        )
        new_id = cur.fetchone()["id"]
    return {"new_mapping_id": new_id, "mapping_name": name}


@router.post("/analyze")
async def analyze_csv(
    file: UploadFile = File(...),
    module: str = Form(...),
    delimiter: Optional[str] = Form(default=None),
    session: dict = Depends(require_auth),
):
    """
    Erste Zeilen parsen, Signatur bilden, System-Templates nach Ähnlichkeit ranken.
    """
    if not get_module_definition(module):
        raise HTTPException(400, f"Unbekanntes Modul: {module}")

    raw = await file.read()
    limits = _load_import_limits()
    max_bytes = limits.get("max_file_bytes", 52_428_800)
    if len(raw) > max_bytes:
        raise HTTPException(
            413,
            f"Datei zu groß (max. {max_bytes} Bytes laut Systemkonfiguration)",
        )

    text = decode_raw_bytes(raw)
    max_rows = limits.get("max_rows_per_file", 50_000)
    if text.count("\n") > max_rows + 5:
        raise HTTPException(
            413,
            f"Zu viele Zeilen (>{max_rows}) laut Systemkonfiguration csv_import.max_rows_per_file",
        )
    delim = delimiter if delimiter in (",", ";", "\t") else None
    headers, sample_rows, used_delim = parse_csv_sample(text, delimiter=delim, max_data_rows=5)
    sig = column_signature(headers)

    mod_def = get_module_definition(module)
    available_fields = mod_def["fields"] if mod_def else {}

    with get_db() as conn:
        cur = get_cursor(conn)
        cur.execute(
            """
            SELECT id, module, mapping_name, description, column_signature,
                   delimiter, encoding, has_header, field_mappings, type_conversions, is_system
            FROM csv_field_mappings
            WHERE is_system = true AND module = %s
            """,
            (module,),
        )
        templates = [r2d(r) for r in cur.fetchall()]

    ranked = []
    for t in templates:
        t_sig = list(t["column_signature"]) if t["column_signature"] else []
        t_norm = sorted({normalize_header_for_signature(str(s)) for s in t_sig})
        score = headers_signature_match_score(sig, t_norm)
        ranked.append(
            {
                "mapping_id": t["id"],
                "mapping_name": t["mapping_name"],
                "confidence": round(score, 4),
                "match_type": "signature_jaccard",
            }
        )
    ranked.sort(key=lambda x: -x["confidence"])

    return {
        "module": module,
        "filename": file.filename,
        "encoding_note": "utf-8/latin-1 mit BOM-Strip",
        "delimiter": used_delim,
        "columns": headers,
        "column_signature_normalized": sig,
        "sample_rows": sample_rows,
        "detected_mappings": ranked[:5],
        "available_fields": available_fields,
    }