scripts/wiki_importer.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
This commit is contained in:
parent
2567d8c786
commit
6bab3cdf04
|
|
@ -10,22 +10,18 @@ Beschreibung:
|
||||||
- Lookup via /exercise/by-external-id, dann create/update/skip inkl. Zählern
|
- Lookup via /exercise/by-external-id, dann create/update/skip inkl. Zählern
|
||||||
- Smoke-Test (--smoke-test): 3 Läufe (create → skip → update)
|
- Smoke-Test (--smoke-test): 3 Läufe (create → skip → update)
|
||||||
|
|
||||||
v2.3.3 – Änderungen ggü. 2.3.2:
|
v2.3.4 – Änderungen ggü. 2.3.3:
|
||||||
- Stabilerer Fingerprint (Kanonisierung & Whitespace-Normalisierung):
|
- **Robuste Template-Erkennung**: Namen werden unicode-normalisiert & diakritik-insensitiv verglichen
|
||||||
• Titel: _ zu Leerzeichen, Gedankenstriche → Bindestrich
|
(z. B. "ÜbungInfoBox" == "UebungInfoBox" == "uebunginfobox").
|
||||||
• summary/execution/notes: Whitespace kollabieren
|
- **Feld-Synonyme & Key-Normalisierung**: "summary/execution/duration/keywords/..." werden über
|
||||||
• keywords: dedupliziert (case-insensitiv) & sortiert
|
mehrere mögliche Parameternamen aufgelöst (z. B. Durchführung/Durchfuehrung/Ablauf).
|
||||||
• duration_minutes: sicher als int
|
- Ziel: Verhindert leere Felder beim 2. Lauf und damit fälschliche Updates.
|
||||||
- Backcompat beim Update-Entscheid: zusätzlich Neu-Berechnung des Fingerprints aus dem gefundenen Payload
|
|
||||||
(verhindert False-Positives bei Altbeständen ohne/mit abweichendem Fingerprint)
|
|
||||||
- Diagnostik: Gründe im Log (not found / unchanged / changed) und Feld-Diff bei Update
|
|
||||||
- Kein API-/CLI-Bruch
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
from typing import Dict, Any, Tuple, Optional
|
from typing import Dict, Any, Tuple, Optional, List
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
import requests
|
import requests
|
||||||
import mwparserfromhell
|
import mwparserfromhell
|
||||||
|
|
@ -33,6 +29,7 @@ from dotenv import load_dotenv
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
# ----- Konfiguration / Defaults -----
|
# ----- Konfiguration / Defaults -----
|
||||||
load_dotenv() # .env laden, falls vorhanden
|
load_dotenv() # .env laden, falls vorhanden
|
||||||
|
|
@ -43,7 +40,41 @@ DEFAULT_CAT = os.getenv("WIKI_CATEGORY", "Übungen")
|
||||||
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
|
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
|
||||||
REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
|
REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
|
||||||
|
|
||||||
# ---- Hilfsfunktionen für Wiki-Router ----
|
# ---- Unicode-/Key-Normalisierung ----
|
||||||
|
|
||||||
|
def _norm_unicode(s: str) -> str:
|
||||||
|
return unicodedata.normalize("NFKC", s)
|
||||||
|
|
||||||
|
def _strip_diacritics(s: str) -> str:
|
||||||
|
return "".join(ch for ch in unicodedata.normalize("NFD", s) if not unicodedata.combining(ch))
|
||||||
|
|
||||||
|
def _norm_key(s: str) -> str:
|
||||||
|
s = _norm_unicode(s or "")
|
||||||
|
s = _strip_diacritics(s)
|
||||||
|
s = s.strip().casefold()
|
||||||
|
return s
|
||||||
|
|
||||||
|
# Template-Aliasse (normalisierte Namen)
|
||||||
|
TPL_UEBUNG_INFOBOX = {"ubunginfobox", "uebunginfobox"}
|
||||||
|
TPL_UEBUNGSBESCHREIBUNG = {"ubungsbeschreibung", "uebungsbeschreibung"}
|
||||||
|
TPL_SKILLDEV = {"skilldevelopment"}
|
||||||
|
|
||||||
|
# Synonyme für Parameter (normalisierte Keys)
|
||||||
|
KEYS_SUMMARY = ["summary", "kurzbeschreibung", "beschreibung", "kurztext"]
|
||||||
|
KEYS_EXECUTION = ["durchführung", "durchfuehrung", "ausführung", "ausfuehrung", "execution", "ablauf"]
|
||||||
|
KEYS_DURATION = ["dauer", "zeit", "dauer_minuten", "dauer (min)", "minuten"]
|
||||||
|
KEYS_KEYWORDS = ["schlüsselworte", "schluesselworte", "keywords", "tags"]
|
||||||
|
KEYS_EQUIPMENT = ["equipment", "geräte", "geraete", "material"]
|
||||||
|
KEYS_DISCIPLINE = ["übungstyp", "uebungstyp", "discipline"]
|
||||||
|
KEYS_GROUP = ["gruppengröße", "gruppengroesse", "group"]
|
||||||
|
KEYS_AGE_GROUP = ["altersgruppe"]
|
||||||
|
KEYS_TARGET_GROUP = ["zielgruppe", "target_group"]
|
||||||
|
KEYS_PURPOSE = ["ziel", "zweck", "purpose"]
|
||||||
|
KEYS_PREPARATION = ["refmethode", "vorbereitung", "preparation"]
|
||||||
|
KEYS_METHOD = ["method", "methode"]
|
||||||
|
KEYS_NOTES = ["hinweise", "notes"]
|
||||||
|
|
||||||
|
# ---- Hilfsfunktionen ----
|
||||||
|
|
||||||
def wiki_health() -> None:
|
def wiki_health() -> None:
|
||||||
r = requests.get(f"{API_BASE_URL}/health", timeout=15)
|
r = requests.get(f"{API_BASE_URL}/health", timeout=15)
|
||||||
|
|
@ -91,30 +122,36 @@ def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
|
||||||
wikicode = mwparserfromhell.parse(wikitext)
|
wikicode = mwparserfromhell.parse(wikitext)
|
||||||
|
|
||||||
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
|
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
|
||||||
|
|
||||||
|
# Templates sammeln (robust gegen Varianten)
|
||||||
for tpl in wikicode.filter_templates():
|
for tpl in wikicode.filter_templates():
|
||||||
name = str(tpl.name).strip()
|
name_raw = str(tpl.name)
|
||||||
if name == "ÜbungInfoBox":
|
name_norm = _norm_key(name_raw)
|
||||||
|
if name_norm in TPL_UEBUNG_INFOBOX:
|
||||||
for p in tpl.params:
|
for p in tpl.params:
|
||||||
raw[str(p.name).strip()] = str(p.value).strip()
|
raw[str(p.name).strip()] = str(p.value).strip()
|
||||||
elif name == "Übungsbeschreibung":
|
elif name_norm in TPL_UEBUNGSBESCHREIBUNG:
|
||||||
for p in tpl.params:
|
for p in tpl.params:
|
||||||
raw[str(p.name).strip()] = str(p.value).strip()
|
raw[str(p.name).strip()] = str(p.value).strip()
|
||||||
elif name == "SkillDevelopment":
|
elif name_norm in TPL_SKILLDEV:
|
||||||
raw.setdefault("capabilities", [])
|
raw.setdefault("capabilities", [])
|
||||||
|
# Standard-Keys (engl. Template)
|
||||||
|
def _getp(t, k):
|
||||||
|
try:
|
||||||
|
return str(t.get(k).value).strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
cap = _getp(tpl, "PrimaryCapability")
|
||||||
|
lvl = _getp(tpl, "CapabilityLevel")
|
||||||
try:
|
try:
|
||||||
cap = str(tpl.get("PrimaryCapability").value).strip()
|
lvl_i = int(lvl)
|
||||||
except Exception:
|
except Exception:
|
||||||
cap = ""
|
lvl_i = 0
|
||||||
try:
|
|
||||||
lvl = int(str(tpl.get("CapabilityLevel").value).strip())
|
|
||||||
except Exception:
|
|
||||||
lvl = 0
|
|
||||||
if cap:
|
if cap:
|
||||||
raw["capabilities"].append({"capability": cap, "level": lvl})
|
raw["capabilities"].append({"capability": cap, "level": lvl_i})
|
||||||
raw["wikitext"] = wikitext
|
raw["wikitext"] = wikitext
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
|
|
||||||
# ---- Fingerprint-Unterstützung (stabil) ----
|
# ---- Fingerprint-Unterstützung (stabil) ----
|
||||||
|
|
||||||
def _normalize(v: Any) -> str:
|
def _normalize(v: Any) -> str:
|
||||||
|
|
@ -138,22 +175,17 @@ def _norm_text(s: str) -> str:
|
||||||
|
|
||||||
def _canon_title(t: str) -> str:
|
def _canon_title(t: str) -> str:
|
||||||
t = (t or "").strip().replace("_", " ")
|
t = (t or "").strip().replace("_", " ")
|
||||||
# Gedankenstriche vereinheitlichen
|
|
||||||
return t.replace("–", "-").replace("—", "-")
|
return t.replace("–", "-").replace("—", "-")
|
||||||
|
|
||||||
|
|
||||||
def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
||||||
# keywords stabilisieren: trim, dedupe (case-insensitiv), sort
|
|
||||||
kws = payload.get("keywords") or []
|
kws = payload.get("keywords") or []
|
||||||
kws = sorted({(k or "").strip() for k in kws if (k or "").strip()}, key=str.casefold)
|
kws = sorted({(k or "").strip() for k in kws if (k or "").strip()}, key=str.casefold)
|
||||||
|
|
||||||
# dauer als int
|
|
||||||
dur = payload.get("duration_minutes") or 0
|
dur = payload.get("duration_minutes") or 0
|
||||||
try:
|
try:
|
||||||
dur = int(round(float(dur)))
|
dur = int(round(float(dur)))
|
||||||
except Exception:
|
except Exception:
|
||||||
dur = 0
|
dur = 0
|
||||||
|
|
||||||
fields = [
|
fields = [
|
||||||
_canon_title(payload.get("title", "")),
|
_canon_title(payload.get("title", "")),
|
||||||
_norm_text(payload.get("summary", "")),
|
_norm_text(payload.get("summary", "")),
|
||||||
|
|
@ -166,11 +198,25 @@ def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
||||||
base = "|".join(_normalize(f) for f in fields)
|
base = "|".join(_normalize(f) for f in fields)
|
||||||
return hashlib.sha256(base.encode("utf-8")).hexdigest()
|
return hashlib.sha256(base.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
# ---- Feldauflösung (Synonyme) ----
|
||||||
|
|
||||||
|
def _norm_keymap(d: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
return { _norm_key(k): v for k, v in d.items() if isinstance(k, str) }
|
||||||
|
|
||||||
|
|
||||||
|
def _get_first(d: Dict[str, Any], candidates: List[str]) -> Any:
|
||||||
|
m = _norm_keymap(d)
|
||||||
|
for c in candidates:
|
||||||
|
v = m.get(c)
|
||||||
|
if v not in (None, ""):
|
||||||
|
return v
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: bool = False) -> Dict[str, Any]:
|
def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: bool = False) -> Dict[str, Any]:
|
||||||
# Exercise.capabilities erwartet Dict[str,int]
|
# Exercise.capabilities erwartet Dict[str,int]
|
||||||
caps_list = raw.get("capabilities", [])
|
caps_list = raw.get("capabilities", [])
|
||||||
capabilities = {}
|
capabilities: Dict[str, int] = {}
|
||||||
for c in caps_list:
|
for c in caps_list:
|
||||||
cap = c.get("capability")
|
cap = c.get("capability")
|
||||||
lvl = c.get("level")
|
lvl = c.get("level")
|
||||||
|
|
@ -180,47 +226,63 @@ def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: b
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Defaults/Fallbacks
|
# Defaults/Fallbacks via Synonyme
|
||||||
try:
|
# summary / execution
|
||||||
duration = float(raw.get("Dauer", 0) or 0)
|
summary = _get_first(raw, KEYS_SUMMARY) or ""
|
||||||
except Exception:
|
execution = _get_first(raw, KEYS_EXECUTION) or ""
|
||||||
duration = 0.0
|
|
||||||
|
|
||||||
keywords = []
|
# duration
|
||||||
kw_raw = raw.get("Schlüsselworte", "")
|
duration = _get_first(raw, KEYS_DURATION)
|
||||||
|
try:
|
||||||
|
duration_f = float(duration or 0)
|
||||||
|
except Exception:
|
||||||
|
duration_f = 0.0
|
||||||
|
|
||||||
|
# keywords
|
||||||
|
kw_raw = _get_first(raw, KEYS_KEYWORDS)
|
||||||
|
keywords: List[str] = []
|
||||||
if isinstance(kw_raw, str):
|
if isinstance(kw_raw, str):
|
||||||
keywords = [k.strip() for k in kw_raw.split(",") if k.strip()]
|
keywords = [k.strip() for k in kw_raw.split(",") if k.strip()]
|
||||||
|
|
||||||
equipment = []
|
# equipment
|
||||||
eq_raw = raw.get("equipment", [])
|
eq_raw = _get_first(raw, KEYS_EQUIPMENT)
|
||||||
|
equipment: List[str] = []
|
||||||
if isinstance(eq_raw, str):
|
if isinstance(eq_raw, str):
|
||||||
equipment = [e.strip() for e in eq_raw.split(",") if e.strip()]
|
equipment = [e.strip() for e in eq_raw.split(",") if e.strip()]
|
||||||
elif isinstance(eq_raw, list):
|
elif isinstance(eq_raw, list):
|
||||||
equipment = [str(e).strip() for e in eq_raw if str(e).strip()]
|
equipment = [str(e).strip() for e in eq_raw if str(e).strip()]
|
||||||
|
|
||||||
notes = raw.get("Hinweise", "") or ""
|
notes = _get_first(raw, KEYS_NOTES) or ""
|
||||||
if mutate:
|
if mutate:
|
||||||
notes = (notes + " [auto-update]").strip()
|
notes = (str(notes) + " [auto-update]").strip()
|
||||||
|
|
||||||
|
discipline = _get_first(raw, KEYS_DISCIPLINE) or ""
|
||||||
|
group = _get_first(raw, KEYS_GROUP) or None
|
||||||
|
age_group = _get_first(raw, KEYS_AGE_GROUP) or ""
|
||||||
|
target_group = _get_first(raw, KEYS_TARGET_GROUP) or ""
|
||||||
|
purpose = _get_first(raw, KEYS_PURPOSE) or ""
|
||||||
|
preparation = _get_first(raw, KEYS_PREPARATION) or ""
|
||||||
|
method = _get_first(raw, KEYS_METHOD) or ""
|
||||||
|
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"title": raw.get("title") or "",
|
"title": raw.get("title") or "",
|
||||||
"summary": raw.get("Summary", "") or "",
|
"summary": str(summary) or "",
|
||||||
"short_description": raw.get("Summary", "") or "",
|
"short_description": str(summary) or "",
|
||||||
"keywords": keywords,
|
"keywords": keywords,
|
||||||
"link": fullurl or "",
|
"link": fullurl or "",
|
||||||
"discipline": raw.get("Übungstyp", "") or "",
|
"discipline": str(discipline) or "",
|
||||||
"group": raw.get("Gruppengröße", "") or None,
|
"group": str(group) if group else None,
|
||||||
"age_group": raw.get("Altersgruppe", "") or "",
|
"age_group": str(age_group) or "",
|
||||||
"target_group": raw.get("Zielgruppe", "") or "",
|
"target_group": str(target_group) or "",
|
||||||
"min_participants": 1,
|
"min_participants": 1,
|
||||||
"duration_minutes": int(round(duration)),
|
"duration_minutes": int(round(duration_f)),
|
||||||
"capabilities": capabilities,
|
"capabilities": capabilities,
|
||||||
"category": category or "",
|
"category": category or "",
|
||||||
"purpose": raw.get("Ziel", "") or "",
|
"purpose": str(purpose) or "",
|
||||||
"execution": raw.get("Durchführung", "") or "",
|
"execution": str(execution) or "",
|
||||||
"notes": notes,
|
"notes": str(notes) or "",
|
||||||
"preparation": raw.get("RefMethode", "") or "",
|
"preparation": str(preparation) or "",
|
||||||
"method": raw.get("method", "") or "",
|
"method": str(method) or "",
|
||||||
"equipment": equipment,
|
"equipment": equipment,
|
||||||
"fullurl": fullurl or "",
|
"fullurl": fullurl or "",
|
||||||
"external_id": f"mw:{raw.get('pageid')}",
|
"external_id": f"mw:{raw.get('pageid')}",
|
||||||
|
|
@ -229,6 +291,7 @@ def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: b
|
||||||
payload["fingerprint"] = compute_fingerprint(payload)
|
payload["fingerprint"] = compute_fingerprint(payload)
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
# ---- Lookup/Upsert ----
|
||||||
|
|
||||||
def lookup_by_external_id(external_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[int]]:
|
def lookup_by_external_id(external_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[int]]:
|
||||||
url = f"{EXERCISE_API}/by-external-id"
|
url = f"{EXERCISE_API}/by-external-id"
|
||||||
|
|
@ -256,6 +319,37 @@ def _payload_subset_for_fp(p: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _print_diff(before: Dict[str, Any], after: Dict[str, Any]) -> None:
|
||||||
|
keys = ["title","summary","execution","notes","duration_minutes","capabilities","keywords"]
|
||||||
|
b = {k: before.get(k) for k in keys}
|
||||||
|
a = {k: after.get(k) for k in keys}
|
||||||
|
def _kws(x):
|
||||||
|
return sorted({(k or "").strip() for k in (x or [])}, key=str.casefold)
|
||||||
|
b_norm = {
|
||||||
|
"title": _canon_title(b.get("title")),
|
||||||
|
"summary": _norm_text(b.get("summary")),
|
||||||
|
"execution": _norm_text(b.get("execution")),
|
||||||
|
"notes": _norm_text(b.get("notes")),
|
||||||
|
"duration_minutes": b.get("duration_minutes"),
|
||||||
|
"capabilities": b.get("capabilities"),
|
||||||
|
"keywords": _kws(b.get("keywords")),
|
||||||
|
}
|
||||||
|
a_norm = {
|
||||||
|
"title": _canon_title(a.get("title")),
|
||||||
|
"summary": _norm_text(a.get("summary")),
|
||||||
|
"execution": _norm_text(a.get("execution")),
|
||||||
|
"notes": _norm_text(a.get("notes")),
|
||||||
|
"duration_minutes": a.get("duration_minutes"),
|
||||||
|
"capabilities": a.get("capabilities"),
|
||||||
|
"keywords": _kws(a.get("keywords")),
|
||||||
|
}
|
||||||
|
diff = {k: (b_norm[k], a_norm[k]) for k in keys if b_norm.get(k) != a_norm.get(k)}
|
||||||
|
if diff:
|
||||||
|
print("[Diff] changes:", json.dumps(diff, ensure_ascii=False))
|
||||||
|
else:
|
||||||
|
print("[Diff] (none in hash fields)")
|
||||||
|
|
||||||
|
|
||||||
def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
||||||
title = payload.get("title", "<ohne Titel>")
|
title = payload.get("title", "<ohne Titel>")
|
||||||
ext_id = payload.get("external_id")
|
ext_id = payload.get("external_id")
|
||||||
|
|
@ -280,7 +374,7 @@ def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
||||||
action, reason = "create", "unexpected lookup type"
|
action, reason = "create", "unexpected lookup type"
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(f"[DryRun] {action.upper():6} '{title}' ({ext_id}) – {reason}")
|
print(f"[DryRun] {action.upper():6} '{title}' ({ext_id}) - {reason}")
|
||||||
if action == "update":
|
if action == "update":
|
||||||
_print_diff(found_payload, payload)
|
_print_diff(found_payload, payload)
|
||||||
return action
|
return action
|
||||||
|
|
@ -308,44 +402,12 @@ def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
print(f"[Update] '{title}' – {reason} -> OK")
|
print(f"[Update] '{title}' - {reason} -> OK")
|
||||||
_print_diff(found_payload, payload)
|
_print_diff(found_payload, payload)
|
||||||
else:
|
else:
|
||||||
print(f"[Skip] '{title}' – {reason}")
|
print(f"[Skip] '{title}' - {reason}")
|
||||||
return action
|
return action
|
||||||
|
|
||||||
|
|
||||||
def _print_diff(before: Dict[str, Any], after: Dict[str, Any]) -> None:
|
|
||||||
"""Kleines Feld-Diff für die Hash-Felder (Diagnose)."""
|
|
||||||
keys = ["title","summary","execution","notes","duration_minutes","capabilities","keywords"]
|
|
||||||
b = {k: before.get(k) for k in keys}
|
|
||||||
a = {k: after.get(k) for k in keys}
|
|
||||||
# für bessere Lesbarkeit normalisieren wir die Textfelder
|
|
||||||
b_norm = {
|
|
||||||
"title": _canon_title(b.get("title")),
|
|
||||||
"summary": _norm_text(b.get("summary")),
|
|
||||||
"execution": _norm_text(b.get("execution")),
|
|
||||||
"notes": _norm_text(b.get("notes")),
|
|
||||||
"duration_minutes": b.get("duration_minutes"),
|
|
||||||
"capabilities": b.get("capabilities"),
|
|
||||||
"keywords": sorted({(k or "").strip() for k in (b.get("keywords") or [])}, key=str.casefold),
|
|
||||||
}
|
|
||||||
a_norm = {
|
|
||||||
"title": _canon_title(a.get("title")),
|
|
||||||
"summary": _norm_text(a.get("summary")),
|
|
||||||
"execution": _norm_text(a.get("execution")),
|
|
||||||
"notes": _norm_text(a.get("notes")),
|
|
||||||
"duration_minutes": a.get("duration_minutes"),
|
|
||||||
"capabilities": a.get("capabilities"),
|
|
||||||
"keywords": sorted({(k or "").strip() for k in (a.get("keywords") or [])}, key=str.casefold),
|
|
||||||
}
|
|
||||||
diff = {k: (b_norm[k], a_norm[k]) for k in keys if b_norm.get(k) != a_norm.get(k)}
|
|
||||||
if diff:
|
|
||||||
print("[Diff] changes:", json.dumps(diff, ensure_ascii=False))
|
|
||||||
else:
|
|
||||||
print("[Diff] (none in hash fields)")
|
|
||||||
|
|
||||||
|
|
||||||
# ----- Orchestrierung -----
|
# ----- Orchestrierung -----
|
||||||
|
|
||||||
def process_one(title: str, category: str, *, mutate: bool = False, dry_run: bool = False) -> str:
|
def process_one(title: str, category: str, *, mutate: bool = False, dry_run: bool = False) -> str:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user