""" FILE: app/frontend/ui_utils.py DESCRIPTION: String-Utilities. Parser für Markdown/YAML (LLM-Healing) und Helper für History-Loading. VERSION: 2.6.0 STATUS: Active DEPENDENCIES: re, yaml, unicodedata, json, datetime LAST_ANALYSIS: 2025-12-15 """ import re import yaml import unicodedata import json from datetime import datetime def slugify(value): if not value: return "" value = str(value).lower() replacements = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', '&': 'und', '+': 'und'} for k, v in replacements.items(): value = value.replace(k, v) value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') value = re.sub(r'[^\w\s-]', '', value).strip() return re.sub(r'[-\s]+', '-', value) def normalize_meta_and_body(meta, body): ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"} clean_meta = {} extra_content = [] if "titel" in meta and "title" not in meta: meta["title"] = meta.pop("titel") tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"] all_tags = [] for key in tag_candidates: if key in meta: val = meta[key] if isinstance(val, list): all_tags.extend(val) elif isinstance(val, str): all_tags.extend([t.strip() for t in val.split(",")]) for key, val in meta.items(): if key in ALLOWED_KEYS: clean_meta[key] = val elif key in tag_candidates: pass else: if val and isinstance(val, str): header = key.replace("_", " ").title() extra_content.append(f"## {header}\n{val}\n") if all_tags: clean_tags = [] for t in all_tags: t_clean = str(t).replace("#", "").strip() if t_clean: clean_tags.append(t_clean) clean_meta["tags"] = list(set(clean_tags)) if extra_content: new_section = "\n".join(extra_content) final_body = f"{new_section}\n{body}" else: final_body = body return clean_meta, final_body def parse_markdown_draft(full_text): clean_text = full_text.strip() pattern_block = r"```(?:markdown|md|yaml)?\s*(.*?)\s*```" match_block = re.search(pattern_block, clean_text, re.DOTALL | re.IGNORECASE) if match_block: clean_text = match_block.group(1).strip() meta = {} body = clean_text yaml_str = "" parts = re.split(r"^---+\s*$", clean_text, maxsplit=2, flags=re.MULTILINE) if len(parts) >= 3: yaml_str = parts[1] body = parts[2] elif clean_text.startswith("---"): fallback_match = re.search(r"^---\s*(.*?)(?=\n#)", clean_text, re.DOTALL | re.MULTILINE) if fallback_match: yaml_str = fallback_match.group(1) body = clean_text.replace(f"---{yaml_str}", "", 1).strip() if yaml_str: yaml_str_clean = yaml_str.replace("#", "") try: parsed = yaml.safe_load(yaml_str_clean) if isinstance(parsed, dict): meta = parsed except Exception as e: print(f"YAML Parsing Warning: {e}") if not meta.get("title"): h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE) if h1_match: meta["title"] = h1_match.group(1).strip() if meta.get("type") == "draft": meta["status"] = "draft" meta["type"] = "experience" return normalize_meta_and_body(meta, body) def build_markdown_doc(meta, body): if "id" not in meta or meta["id"] == "generated_on_save": raw_title = meta.get('title', 'note') clean_slug = slugify(raw_title)[:50] or "note" meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{clean_slug}" meta["updated"] = datetime.now().strftime("%Y-%m-%d") ordered_meta = {} prio_keys = ["id", "type", "title", "status", "tags"] for k in prio_keys: if k in meta: ordered_meta[k] = meta.pop(k) ordered_meta.update(meta) try: yaml_str = yaml.dump(ordered_meta, default_flow_style=None, sort_keys=False, allow_unicode=True).strip() except: yaml_str = "error: generating_yaml" return f"---\n{yaml_str}\n---\n\n{body}" def load_history_from_logs(filepath, limit=10): queries = [] if filepath.exists(): try: with open(filepath, "r", encoding="utf-8") as f: lines = f.readlines() for line in reversed(lines): try: entry = json.loads(line) q = entry.get("query_text") if q and q not in queries: queries.append(q) if len(queries) >= limit: break except: continue except: pass return queries