feat(csv-parser): Introduce header signature ranking metrics for enhanced CSV analysis
- Added new functions for calculating header signature recall and ranking metrics, improving the analysis of CSV templates. - Updated existing CSV analysis endpoints to utilize the new ranking metrics, enhancing the accuracy of template matching. - Refactored related code to replace Jaccard score calculations with the new metrics, providing a more comprehensive evaluation of CSV structure. - Improved documentation for new functions to clarify their purpose and usage in the context of CSV template analysis.
This commit is contained in:
parent
c10da55ec6
commit
b4cc3cb934
|
|
@ -7,7 +7,7 @@ from __future__ import annotations
|
||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, Iterator, List, Tuple
|
from typing import Any, Dict, Iterator, List, Sequence, Tuple
|
||||||
|
|
||||||
_DEFAULT_DELIMS = [",", ";", "\t"]
|
_DEFAULT_DELIMS = [",", ";", "\t"]
|
||||||
|
|
||||||
|
|
@ -115,7 +115,7 @@ def column_signature(headers: List[str]) -> List[str]:
|
||||||
|
|
||||||
|
|
||||||
def headers_signature_match_score(sig_csv: List[str], sig_template: List[str]) -> float:
|
def headers_signature_match_score(sig_csv: List[str], sig_template: List[str]) -> float:
|
||||||
"""Jaccard-Überlappung 0..1."""
|
"""Jaccard-Überlappung 0..1 (|A∩B|/|A∪B|). Fällt stark, wenn die CSV viele Zusatzspalten hat."""
|
||||||
a, b = set(sig_csv), set(sig_template)
|
a, b = set(sig_csv), set(sig_template)
|
||||||
if not a and not b:
|
if not a and not b:
|
||||||
return 1.0
|
return 1.0
|
||||||
|
|
@ -126,6 +126,46 @@ def headers_signature_match_score(sig_csv: List[str], sig_template: List[str]) -
|
||||||
return inter / union if union else 0.0
|
return inter / union if union else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def headers_signature_template_recall(sig_csv: Sequence[str], sig_template: Sequence[str]) -> float:
|
||||||
|
"""
|
||||||
|
Anteil der Template-Spalten (Signatur), die in der CSV vorkommen: |A∩B|/|B|.
|
||||||
|
100 %, sobald alle für die Vorlage relevanten Spalten in der Datei sind — unabhängig von
|
||||||
|
Zusatzspalten (Gewicht + Ernährung in einer Datei erzeugt keinen „Abzug“ für die jeweilige Vorlage).
|
||||||
|
"""
|
||||||
|
a = set(sig_csv)
|
||||||
|
b = {normalize_header_for_signature(str(x)) for x in sig_template}
|
||||||
|
b.discard("")
|
||||||
|
if not b:
|
||||||
|
return 1.0 if not a else 0.0
|
||||||
|
inter = len(a & b)
|
||||||
|
return inter / len(b)
|
||||||
|
|
||||||
|
|
||||||
|
def headers_signature_rank_metrics(sig_csv: List[str], sig_template: List[str]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Einheitliche Kennzahlen für Vorlagen-Ranking und UI.
|
||||||
|
confidence = template_recall (empfohlen für Anzeige / Sortierung primär).
|
||||||
|
"""
|
||||||
|
a = set(sig_csv)
|
||||||
|
b = {normalize_header_for_signature(str(x)) for x in sig_template}
|
||||||
|
b.discard("")
|
||||||
|
inter = a & b
|
||||||
|
n_inter = len(inter)
|
||||||
|
n_b = len(b)
|
||||||
|
n_a = len(a)
|
||||||
|
union = len(a | b)
|
||||||
|
template_recall = n_inter / n_b if n_b else (1.0 if not n_a else 0.0)
|
||||||
|
jaccard = n_inter / union if union else 0.0
|
||||||
|
return {
|
||||||
|
"confidence": round(template_recall, 4),
|
||||||
|
"template_recall": round(template_recall, 4),
|
||||||
|
"jaccard": round(jaccard, 4),
|
||||||
|
"columns_matched": n_inter,
|
||||||
|
"columns_in_template": n_b,
|
||||||
|
"columns_in_csv": n_a,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_csv_import_limits(conn_row: dict | None) -> dict[str, int]:
|
def get_csv_import_limits(conn_row: dict | None) -> dict[str, int]:
|
||||||
"""Liest Limits aus system_config.csv_import; Fallback bei fehlendem Key."""
|
"""Liest Limits aus system_config.csv_import; Fallback bei fehlendem Key."""
|
||||||
defaults = {"max_rows_per_file": 50_000, "max_file_bytes": 52_428_800}
|
defaults = {"max_rows_per_file": 50_000, "max_file_bytes": 52_428_800}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from csv_parser.core import (
|
||||||
column_signature,
|
column_signature,
|
||||||
decode_raw_bytes,
|
decode_raw_bytes,
|
||||||
get_csv_import_limits,
|
get_csv_import_limits,
|
||||||
headers_signature_match_score,
|
headers_signature_rank_metrics,
|
||||||
normalize_header_for_signature,
|
normalize_header_for_signature,
|
||||||
parse_csv_sample,
|
parse_csv_sample,
|
||||||
)
|
)
|
||||||
|
|
@ -134,7 +134,7 @@ async def admin_analyze_csv_for_template(
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
CSV hochladen wie im Nutzer-Import: Spalten + Vorschau + Vorschläge für field_mappings
|
CSV hochladen wie im Nutzer-Import: Spalten + Vorschau + Vorschläge für field_mappings
|
||||||
und type_conversions. Optional Seed-Vorlage (ID) oder beste Jaccard-Systemvorlage für das Modul.
|
und type_conversions. Optional Seed-Vorlage (ID) oder beste Systemvorlage (Abdeckung, dann Jaccard).
|
||||||
"""
|
"""
|
||||||
_ = session
|
_ = session
|
||||||
if not get_module_definition(module):
|
if not get_module_definition(module):
|
||||||
|
|
@ -191,15 +191,15 @@ async def admin_analyze_csv_for_template(
|
||||||
)
|
)
|
||||||
rows = [r2d(r) for r in cur.fetchall()]
|
rows = [r2d(r) for r in cur.fetchall()]
|
||||||
best: dict | None = None
|
best: dict | None = None
|
||||||
best_score = -1.0
|
best_key: tuple[float, int, float] = (-1.0, -1, -1.0)
|
||||||
for t in rows:
|
for t in rows:
|
||||||
t_sig = list(t.get("column_signature") or [])
|
t_sig = list(t.get("column_signature") or [])
|
||||||
t_norm = sorted({normalize_header_for_signature(str(s)) for s in t_sig})
|
m = headers_signature_rank_metrics(sig, t_sig)
|
||||||
score = headers_signature_match_score(sig, t_norm)
|
key = (m["confidence"], m["columns_matched"], m["jaccard"])
|
||||||
if score > best_score:
|
if key > best_key:
|
||||||
best_score = score
|
best_key = key
|
||||||
best = t
|
best = t
|
||||||
if best and best_score > 0:
|
if best and best_key[0] > 0:
|
||||||
seed_row = best
|
seed_row = best
|
||||||
|
|
||||||
seed_fm = (seed_row or {}).get("field_mappings") or {}
|
seed_fm = (seed_row or {}).get("field_mappings") or {}
|
||||||
|
|
@ -214,13 +214,17 @@ async def admin_analyze_csv_for_template(
|
||||||
|
|
||||||
seed_meta = None
|
seed_meta = None
|
||||||
if seed_row:
|
if seed_row:
|
||||||
t_sig = [normalize_header_for_signature(str(s)) for s in (seed_row.get("column_signature") or [])]
|
t_sig = list(seed_row.get("column_signature") or [])
|
||||||
|
sm = headers_signature_rank_metrics(sig, t_sig)
|
||||||
seed_meta = {
|
seed_meta = {
|
||||||
"id": seed_row["id"],
|
"id": seed_row["id"],
|
||||||
"mapping_name": seed_row.get("mapping_name"),
|
"mapping_name": seed_row.get("mapping_name"),
|
||||||
"confidence": round(headers_signature_match_score(sig, sorted(set(t_sig))), 4)
|
"confidence": sm["confidence"],
|
||||||
if t_sig
|
"template_recall": sm["template_recall"],
|
||||||
else 0.0,
|
"jaccard": sm["jaccard"],
|
||||||
|
"columns_matched": sm["columns_matched"],
|
||||||
|
"columns_in_template": sm["columns_in_template"],
|
||||||
|
"columns_in_csv": sm["columns_in_csv"],
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ from csv_parser.core import (
|
||||||
decode_raw_bytes,
|
decode_raw_bytes,
|
||||||
column_signature,
|
column_signature,
|
||||||
get_csv_import_limits,
|
get_csv_import_limits,
|
||||||
headers_signature_match_score,
|
headers_signature_rank_metrics,
|
||||||
normalize_header_for_signature,
|
normalize_header_for_signature,
|
||||||
parse_csv_sample,
|
parse_csv_sample,
|
||||||
)
|
)
|
||||||
|
|
@ -247,19 +247,29 @@ async def analyze_csv(
|
||||||
ranked = []
|
ranked = []
|
||||||
for t in templates:
|
for t in templates:
|
||||||
t_sig = list(t["column_signature"]) if t["column_signature"] else []
|
t_sig = list(t["column_signature"]) if t["column_signature"] else []
|
||||||
t_norm = sorted({normalize_header_for_signature(str(s)) for s in t_sig})
|
metrics = headers_signature_rank_metrics(sig, t_sig)
|
||||||
score = headers_signature_match_score(sig, t_norm)
|
|
||||||
ranked.append(
|
ranked.append(
|
||||||
{
|
{
|
||||||
"mapping_id": t["id"],
|
"mapping_id": t["id"],
|
||||||
"module": t["module"],
|
"module": t["module"],
|
||||||
"mapping_name": t["mapping_name"],
|
"mapping_name": t["mapping_name"],
|
||||||
"is_system": bool(t.get("is_system")),
|
"is_system": bool(t.get("is_system")),
|
||||||
"confidence": round(score, 4),
|
"confidence": metrics["confidence"],
|
||||||
"match_type": "signature_jaccard",
|
"template_recall": metrics["template_recall"],
|
||||||
|
"jaccard": metrics["jaccard"],
|
||||||
|
"columns_matched": metrics["columns_matched"],
|
||||||
|
"columns_in_template": metrics["columns_in_template"],
|
||||||
|
"columns_in_csv": metrics["columns_in_csv"],
|
||||||
|
"match_type": "template_recall",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
ranked.sort(key=lambda x: -x["confidence"])
|
ranked.sort(
|
||||||
|
key=lambda x: (
|
||||||
|
-(x.get("confidence") or 0),
|
||||||
|
-(x.get("columns_matched") or 0),
|
||||||
|
-(x.get("jaccard") or 0),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
top = ranked[:25]
|
top = ranked[:25]
|
||||||
recommended = top[0] if top and (top[0]["confidence"] or 0) > 0 else None
|
recommended = top[0] if top and (top[0]["confidence"] or 0) > 0 else None
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from csv_parser.core import (
|
||||||
parse_csv_sample,
|
parse_csv_sample,
|
||||||
column_signature,
|
column_signature,
|
||||||
headers_signature_match_score,
|
headers_signature_match_score,
|
||||||
|
headers_signature_rank_metrics,
|
||||||
get_csv_import_limits,
|
get_csv_import_limits,
|
||||||
iter_csv_dict_rows,
|
iter_csv_dict_rows,
|
||||||
)
|
)
|
||||||
|
|
@ -46,6 +47,20 @@ def test_jaccard():
|
||||||
assert headers_signature_match_score(s1, s2) == pytest.approx(2 / 3)
|
assert headers_signature_match_score(s1, s2) == pytest.approx(2 / 3)
|
||||||
|
|
||||||
|
|
||||||
|
def test_template_recall_full_when_csv_has_extra_columns():
|
||||||
|
"""Alle Template-Spalten in der CSV → Recall 1.0; Jaccard niedriger bei vielen Zusatzspalten."""
|
||||||
|
csv_sig = column_signature(
|
||||||
|
["D", "E", "F", "Extra1", "Extra2", "Extra3", "Extra4", "Extra5"]
|
||||||
|
)
|
||||||
|
tmpl_sig = column_signature(["d", "e", "f"])
|
||||||
|
m = headers_signature_rank_metrics(csv_sig, tmpl_sig)
|
||||||
|
assert m["confidence"] == 1.0
|
||||||
|
assert m["template_recall"] == 1.0
|
||||||
|
assert m["columns_matched"] == 3
|
||||||
|
assert m["columns_in_template"] == 3
|
||||||
|
assert m["jaccard"] == pytest.approx(3 / 8)
|
||||||
|
|
||||||
|
|
||||||
def test_get_csv_import_limits_default():
|
def test_get_csv_import_limits_default():
|
||||||
assert get_csv_import_limits(None)["max_rows_per_file"] == 50_000
|
assert get_csv_import_limits(None)["max_rows_per_file"] == 50_000
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { useEffect, useMemo, useState } from 'react'
|
||||||
import { Link, useNavigate, useParams } from 'react-router-dom'
|
import { Link, useNavigate, useParams } from 'react-router-dom'
|
||||||
import { ArrowLeft, FileSpreadsheet, Loader2, Save, Trash2 } from 'lucide-react'
|
import { ArrowLeft, FileSpreadsheet, Loader2, Save, Trash2 } from 'lucide-react'
|
||||||
import { api } from '../utils/api'
|
import { api } from '../utils/api'
|
||||||
|
import { csvPreviewTdStyle } from '../utils/csvPreviewCells'
|
||||||
|
|
||||||
const MODULE_LABEL = {
|
const MODULE_LABEL = {
|
||||||
nutrition: 'Ernährung',
|
nutrition: 'Ernährung',
|
||||||
|
|
@ -38,16 +39,7 @@ function SampleTable({ sampleRows, columns }) {
|
||||||
{sampleRows.slice(0, 5).map((row, i) => (
|
{sampleRows.slice(0, 5).map((row, i) => (
|
||||||
<tr key={i}>
|
<tr key={i}>
|
||||||
{showCols.map((c) => (
|
{showCols.map((c) => (
|
||||||
<td
|
<td key={c} style={csvPreviewTdStyle(row[c] ?? '—')}>
|
||||||
key={c}
|
|
||||||
style={{
|
|
||||||
padding: '6px',
|
|
||||||
borderBottom: '1px solid var(--border)',
|
|
||||||
maxWidth: 140,
|
|
||||||
overflow: 'hidden',
|
|
||||||
textOverflow: 'ellipsis',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{row[c] ?? '—'}
|
{row[c] ?? '—'}
|
||||||
</td>
|
</td>
|
||||||
))}
|
))}
|
||||||
|
|
@ -298,7 +290,7 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
value={module}
|
value={module}
|
||||||
disabled={!isNew}
|
disabled={!isNew}
|
||||||
onChange={(e) => setModule(e.target.value)}
|
onChange={(e) => setModule(e.target.value)}
|
||||||
style={{ width: '100%', marginTop: 8 }}
|
style={{ width: '100%', marginTop: 8, textAlign: 'left', minHeight: 46, padding: '11px 14px' }}
|
||||||
>
|
>
|
||||||
{modules.map((m) => (
|
{modules.map((m) => (
|
||||||
<option key={m.id} value={m.id}>
|
<option key={m.id} value={m.id}>
|
||||||
|
|
@ -327,7 +319,7 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
Trennzeichen (optional, sonst automatisch):
|
Trennzeichen (optional, sonst automatisch):
|
||||||
<select
|
<select
|
||||||
className="form-input"
|
className="form-input"
|
||||||
style={{ width: '100%', marginTop: 6 }}
|
style={{ width: '100%', marginTop: 6, textAlign: 'left', minHeight: 44, padding: '10px 12px' }}
|
||||||
value={delimiterOverride}
|
value={delimiterOverride}
|
||||||
onChange={(e) => setDelimiterOverride(e.target.value)}
|
onChange={(e) => setDelimiterOverride(e.target.value)}
|
||||||
>
|
>
|
||||||
|
|
@ -341,11 +333,11 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
Optional: feste Seed-Vorlage für Vorschläge:
|
Optional: feste Seed-Vorlage für Vorschläge:
|
||||||
<select
|
<select
|
||||||
className="form-input"
|
className="form-input"
|
||||||
style={{ width: '100%', marginTop: 6 }}
|
style={{ width: '100%', marginTop: 6, textAlign: 'left', minHeight: 44, padding: '10px 12px' }}
|
||||||
value={seedTemplateId}
|
value={seedTemplateId}
|
||||||
onChange={(e) => setSeedTemplateId(e.target.value)}
|
onChange={(e) => setSeedTemplateId(e.target.value)}
|
||||||
>
|
>
|
||||||
<option value="">Beste passende System-Vorlage (Jaccard)</option>
|
<option value="">Beste passende System-Vorlage (Abdeckung der Vorlagen-Spalten)</option>
|
||||||
{seedOptions.map((s) => (
|
{seedOptions.map((s) => (
|
||||||
<option key={s.id} value={String(s.id)}>
|
<option key={s.id} value={String(s.id)}>
|
||||||
{s.mapping_name}
|
{s.mapping_name}
|
||||||
|
|
@ -371,9 +363,18 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
)}
|
)}
|
||||||
</button>
|
</button>
|
||||||
{seedHint && (
|
{seedHint && (
|
||||||
<p style={{ fontSize: 13, color: 'var(--text2)', marginTop: 12 }}>
|
<p style={{ fontSize: 13, color: 'var(--text2)', marginTop: 12, lineHeight: 1.5 }}>
|
||||||
Seed: <strong>{seedHint.mapping_name}</strong> · Übereinstimmung ca.{' '}
|
Seed: <strong>{seedHint.mapping_name}</strong> · Vorlage abgedeckt{' '}
|
||||||
{Math.round((seedHint.confidence || 0) * 100)} %
|
<strong>{Math.round((seedHint.confidence || 0) * 100)} %</strong>
|
||||||
|
{seedHint.columns_matched != null && seedHint.columns_in_template != null
|
||||||
|
? ` (${seedHint.columns_matched}/${seedHint.columns_in_template} Spalten)`
|
||||||
|
: ''}
|
||||||
|
{seedHint.jaccard != null && (
|
||||||
|
<>
|
||||||
|
{' '}
|
||||||
|
· Jaccard <strong>{Math.round(seedHint.jaccard * 100)} %</strong>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
{sampleRows.length > 0 && <SampleTable sampleRows={sampleRows} columns={columns} />}
|
{sampleRows.length > 0 && <SampleTable sampleRows={sampleRows} columns={columns} />}
|
||||||
|
|
@ -386,7 +387,7 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
</label>
|
</label>
|
||||||
<input
|
<input
|
||||||
className="form-input"
|
className="form-input"
|
||||||
style={{ width: '100%' }}
|
style={{ width: '100%', textAlign: 'left' }}
|
||||||
value={mappingName}
|
value={mappingName}
|
||||||
onChange={(e) => setMappingName(e.target.value)}
|
onChange={(e) => setMappingName(e.target.value)}
|
||||||
placeholder="z. B. FDDB Export 2026"
|
placeholder="z. B. FDDB Export 2026"
|
||||||
|
|
@ -396,14 +397,19 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
</label>
|
</label>
|
||||||
<textarea
|
<textarea
|
||||||
className="form-input"
|
className="form-input"
|
||||||
style={{ width: '100%', minHeight: 64 }}
|
style={{ width: '100%', minHeight: 64, textAlign: 'left' }}
|
||||||
value={description}
|
value={description}
|
||||||
onChange={(e) => setDescription(e.target.value)}
|
onChange={(e) => setDescription(e.target.value)}
|
||||||
/>
|
/>
|
||||||
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12, marginTop: 12 }}>
|
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 12, marginTop: 12 }}>
|
||||||
<label>
|
<label>
|
||||||
<span className="form-label">Trennzeichen (gespeichert)</span>
|
<span className="form-label">Trennzeichen (gespeichert)</span>
|
||||||
<select className="form-input" style={{ width: '100%', marginTop: 6 }} value={delimiter} onChange={(e) => setDelimiter(e.target.value)}>
|
<select
|
||||||
|
className="form-input"
|
||||||
|
style={{ width: '100%', marginTop: 6, textAlign: 'left', minHeight: 44, padding: '10px 12px' }}
|
||||||
|
value={delimiter}
|
||||||
|
onChange={(e) => setDelimiter(e.target.value)}
|
||||||
|
>
|
||||||
<option value=";">Semikolon</option>
|
<option value=";">Semikolon</option>
|
||||||
<option value=",">Komma</option>
|
<option value=",">Komma</option>
|
||||||
<option value="\t">Tab</option>
|
<option value="\t">Tab</option>
|
||||||
|
|
@ -413,7 +419,7 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
<span className="form-label">Kopfzeile</span>
|
<span className="form-label">Kopfzeile</span>
|
||||||
<select
|
<select
|
||||||
className="form-input"
|
className="form-input"
|
||||||
style={{ width: '100%', marginTop: 6 }}
|
style={{ width: '100%', marginTop: 6, textAlign: 'left', minHeight: 44, padding: '10px 12px' }}
|
||||||
value={hasHeader ? 'yes' : 'no'}
|
value={hasHeader ? 'yes' : 'no'}
|
||||||
onChange={(e) => setHasHeader(e.target.value === 'yes')}
|
onChange={(e) => setHasHeader(e.target.value === 'yes')}
|
||||||
>
|
>
|
||||||
|
|
@ -435,16 +441,25 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
key={col}
|
key={col}
|
||||||
style={{
|
style={{
|
||||||
display: 'grid',
|
display: 'grid',
|
||||||
gridTemplateColumns: 'minmax(0, 1fr) minmax(140px, 200px)',
|
gridTemplateColumns: 'minmax(0, 1fr) minmax(280px, min(52vw, 440px))',
|
||||||
gap: 10,
|
gap: '10px 16px',
|
||||||
alignItems: 'center',
|
alignItems: 'center',
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<code style={{ fontSize: 12, wordBreak: 'break-word', color: 'var(--text2)' }}>{col}</code>
|
<code style={{ fontSize: 12, wordBreak: 'break-word', color: 'var(--text2)', textAlign: 'left' }}>
|
||||||
|
{col}
|
||||||
|
</code>
|
||||||
<select
|
<select
|
||||||
className="form-input"
|
className="form-input"
|
||||||
value={fieldMappings[col] || '-'}
|
value={fieldMappings[col] || '-'}
|
||||||
onChange={(e) => updateMapping(col, e.target.value)}
|
onChange={(e) => updateMapping(col, e.target.value)}
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
minHeight: 46,
|
||||||
|
textAlign: 'left',
|
||||||
|
padding: '11px 14px',
|
||||||
|
fontSize: 15,
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
<option value="-">— ignorieren</option>
|
<option value="-">— ignorieren</option>
|
||||||
{targetOptions.map((o) => (
|
{targetOptions.map((o) => (
|
||||||
|
|
@ -471,7 +486,14 @@ export default function AdminCsvTemplateEditorPage() {
|
||||||
</p>
|
</p>
|
||||||
<textarea
|
<textarea
|
||||||
className="form-input"
|
className="form-input"
|
||||||
style={{ width: '100%', minHeight: 200, marginTop: 8, fontFamily: 'monospace', fontSize: 12 }}
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
minHeight: 200,
|
||||||
|
marginTop: 8,
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: 12,
|
||||||
|
textAlign: 'left',
|
||||||
|
}}
|
||||||
value={typeConversionsText}
|
value={typeConversionsText}
|
||||||
onChange={(e) => setTypeConversionsText(e.target.value)}
|
onChange={(e) => setTypeConversionsText(e.target.value)}
|
||||||
/>
|
/>
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { useState, useMemo } from 'react'
|
||||||
import { useNavigate } from 'react-router-dom'
|
import { useNavigate } from 'react-router-dom'
|
||||||
import { ArrowLeft, FileSpreadsheet, Loader2 } from 'lucide-react'
|
import { ArrowLeft, FileSpreadsheet, Loader2 } from 'lucide-react'
|
||||||
import { api } from '../utils/api'
|
import { api } from '../utils/api'
|
||||||
|
import { csvPreviewTdStyle } from '../utils/csvPreviewCells'
|
||||||
|
|
||||||
/** Ziele, die der Universal-Executor bereits schreiben kann (ohne manuelle Modul-Wahl). */
|
/** Ziele, die der Universal-Executor bereits schreiben kann (ohne manuelle Modul-Wahl). */
|
||||||
const EXECUTOR_READY = new Set(['nutrition', 'weight', 'blood_pressure'])
|
const EXECUTOR_READY = new Set(['nutrition', 'weight', 'blood_pressure'])
|
||||||
|
|
@ -32,6 +33,11 @@ function mergeMappingChoices(detected, mapData) {
|
||||||
name: row.name,
|
name: row.name,
|
||||||
is_system: row.is_system,
|
is_system: row.is_system,
|
||||||
confidence: d.confidence ?? 0,
|
confidence: d.confidence ?? 0,
|
||||||
|
jaccard: d.jaccard,
|
||||||
|
template_recall: d.template_recall,
|
||||||
|
columns_matched: d.columns_matched,
|
||||||
|
columns_in_template: d.columns_in_template,
|
||||||
|
columns_in_csv: d.columns_in_csv,
|
||||||
})
|
})
|
||||||
seen.add(row.id)
|
seen.add(row.id)
|
||||||
}
|
}
|
||||||
|
|
@ -78,16 +84,7 @@ function SampleTable({ sampleRows, columns }) {
|
||||||
{sampleRows.slice(0, 5).map((row, i) => (
|
{sampleRows.slice(0, 5).map((row, i) => (
|
||||||
<tr key={i}>
|
<tr key={i}>
|
||||||
{showCols.map((c) => (
|
{showCols.map((c) => (
|
||||||
<td
|
<td key={c} style={csvPreviewTdStyle(row[c] ?? '—')}>
|
||||||
key={c}
|
|
||||||
style={{
|
|
||||||
padding: '6px',
|
|
||||||
borderBottom: '1px solid var(--border)',
|
|
||||||
maxWidth: 140,
|
|
||||||
overflow: 'hidden',
|
|
||||||
textOverflow: 'ellipsis',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{row[c] ?? '—'}
|
{row[c] ?? '—'}
|
||||||
</td>
|
</td>
|
||||||
))}
|
))}
|
||||||
|
|
@ -296,8 +293,24 @@ export default function UniversalCsvImportPage() {
|
||||||
>
|
>
|
||||||
<strong>Vorschlag:</strong>{' '}
|
<strong>Vorschlag:</strong>{' '}
|
||||||
{MODULE_LABEL[analyzeResult.recommended.module] || analyzeResult.recommended.module} —{' '}
|
{MODULE_LABEL[analyzeResult.recommended.module] || analyzeResult.recommended.module} —{' '}
|
||||||
{analyzeResult.recommended.mapping_name} (
|
{analyzeResult.recommended.mapping_name}.
|
||||||
{Math.round((analyzeResult.recommended.confidence || 0) * 100)} % Übereinstimmung der Spalten)
|
<br />
|
||||||
|
<span style={{ fontSize: 13, color: 'var(--text2)', fontWeight: 500 }}>
|
||||||
|
Vorlage abgedeckt:{' '}
|
||||||
|
<strong>{Math.round((analyzeResult.recommended.confidence || 0) * 100)} %</strong>
|
||||||
|
{analyzeResult.recommended.columns_matched != null &&
|
||||||
|
analyzeResult.recommended.columns_in_template != null
|
||||||
|
? ` (${analyzeResult.recommended.columns_matched}/${analyzeResult.recommended.columns_in_template} erwartete Spalten in der Datei)`
|
||||||
|
: ''}
|
||||||
|
.{' '}
|
||||||
|
{analyzeResult.recommended.jaccard != null && (
|
||||||
|
<>
|
||||||
|
Jaccard{' '}
|
||||||
|
<strong>{Math.round(analyzeResult.recommended.jaccard * 100)} %</strong> (gesamte
|
||||||
|
Spalten-Überlappung — niedriger, wenn die CSV viele Zusatzspalten hat).
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|
@ -307,8 +320,9 @@ export default function UniversalCsvImportPage() {
|
||||||
<ul style={{ margin: '8px 0 0 18px', padding: 0 }}>
|
<ul style={{ margin: '8px 0 0 18px', padding: 0 }}>
|
||||||
{analyzeResult.detected_mappings.slice(1, 8).map((d) => (
|
{analyzeResult.detected_mappings.slice(1, 8).map((d) => (
|
||||||
<li key={d.mapping_id}>
|
<li key={d.mapping_id}>
|
||||||
{MODULE_LABEL[d.module] || d.module}: {d.mapping_name} ·{' '}
|
{MODULE_LABEL[d.module] || d.module}: {d.mapping_name} · Vorlage{' '}
|
||||||
{Math.round((d.confidence || 0) * 100)} %
|
{Math.round((d.confidence || 0) * 100)} %
|
||||||
|
{d.jaccard != null ? ` · Jaccard ${Math.round(d.jaccard * 100)} %` : ''}
|
||||||
</li>
|
</li>
|
||||||
))}
|
))}
|
||||||
</ul>
|
</ul>
|
||||||
|
|
@ -324,7 +338,14 @@ export default function UniversalCsvImportPage() {
|
||||||
className="form-input"
|
className="form-input"
|
||||||
value={mappingId}
|
value={mappingId}
|
||||||
onChange={(e) => setMappingId(e.target.value)}
|
onChange={(e) => setMappingId(e.target.value)}
|
||||||
style={{ width: '100%', marginTop: 8 }}
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
marginTop: 8,
|
||||||
|
minHeight: 48,
|
||||||
|
textAlign: 'left',
|
||||||
|
padding: '12px 14px',
|
||||||
|
fontSize: 15,
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
{mappingChoices.length === 0 ? (
|
{mappingChoices.length === 0 ? (
|
||||||
<option value="">Keine Vorlage geladen</option>
|
<option value="">Keine Vorlage geladen</option>
|
||||||
|
|
@ -333,7 +354,11 @@ export default function UniversalCsvImportPage() {
|
||||||
<option key={o.id} value={o.id}>
|
<option key={o.id} value={o.id}>
|
||||||
{MODULE_LABEL[o.module] || o.module} — {o.name}
|
{MODULE_LABEL[o.module] || o.module} — {o.name}
|
||||||
{o.is_system ? ' (System)' : ''}
|
{o.is_system ? ' (System)' : ''}
|
||||||
{o.confidence > 0 ? ` · ${Math.round(o.confidence * 100)} %` : ''}
|
{o.confidence > 0
|
||||||
|
? ` · Vorlage ${Math.round(o.confidence * 100)} %${
|
||||||
|
o.jaccard != null ? ` · Jaccard ${Math.round(o.jaccard * 100)} %` : ''
|
||||||
|
}`
|
||||||
|
: ''}
|
||||||
{!EXECUTOR_READY.has(o.module) ? ' · Import: noch nicht hier' : ''}
|
{!EXECUTOR_READY.has(o.module) ? ' · Import: noch nicht hier' : ''}
|
||||||
</option>
|
</option>
|
||||||
))
|
))
|
||||||
|
|
|
||||||
47
frontend/src/utils/csvPreviewCells.js
Normal file
47
frontend/src/utils/csvPreviewCells.js
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
/**
|
||||||
|
* Vorschau-Zellen: Zahlen rechts (tabular-nums), Text links — typisch für CSV-Tabellen.
|
||||||
|
*/
|
||||||
|
|
||||||
|
function stripForNumericTest(s) {
|
||||||
|
return String(s)
|
||||||
|
.trim()
|
||||||
|
.replace(/\u00a0/g, '')
|
||||||
|
.replace(/\s/g, '')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Einfache Heuristik: reine Zahl / Dezimal (ein Punkt oder ein Komma als Dezimaltrenner).
|
||||||
|
* Keine Datums-/Zeitstrings (mehrere Punkte ohne klares Muster werden nicht als Zahl gewertet).
|
||||||
|
*/
|
||||||
|
export function isCsvPreviewNumericCell(val) {
|
||||||
|
if (val == null || val === '—') return false
|
||||||
|
let s = stripForNumericTest(val)
|
||||||
|
if (!s) return false
|
||||||
|
if (/^\d{1,2}[./]\d{1,2}[./]\d{2,4}/.test(s)) return false
|
||||||
|
if (/^\d{4}-\d{2}-\d{2}/.test(s)) return false
|
||||||
|
if (/:/.test(s) && /\d/.test(s)) return false
|
||||||
|
|
||||||
|
s = s.replace(/^[-+]/, '')
|
||||||
|
const comma = (s.match(/,/g) || []).length
|
||||||
|
const dot = (s.match(/\./g) || []).length
|
||||||
|
if (comma > 1 || dot > 1 || (comma >= 1 && dot >= 1)) return false
|
||||||
|
|
||||||
|
s = s.replace(',', '.')
|
||||||
|
if (!/^\d*\.?\d+$/.test(s) && !/^\d+\.\d+$/.test(s)) return false
|
||||||
|
return Number.isFinite(Number(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
/** td-Style für CSV-Vorschau (Import + Admin-Editor) */
|
||||||
|
export function csvPreviewTdStyle(val, extra = {}) {
|
||||||
|
const numeric = isCsvPreviewNumericCell(val)
|
||||||
|
return {
|
||||||
|
padding: '6px 8px',
|
||||||
|
borderBottom: '1px solid var(--border)',
|
||||||
|
maxWidth: 220,
|
||||||
|
overflow: 'hidden',
|
||||||
|
textOverflow: 'ellipsis',
|
||||||
|
textAlign: numeric ? 'right' : 'left',
|
||||||
|
fontVariantNumeric: numeric ? 'tabular-nums' : undefined,
|
||||||
|
...extra,
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user