mindnet/app/frontend/ui_utils.py

"""
FILE: app/frontend/ui_utils.py
DESCRIPTION: String-Utilities. Parser für Markdown/YAML (LLM-Healing) und Helper für History-Loading.
VERSION: 2.6.0
STATUS: Active
DEPENDENCIES: re, yaml, unicodedata, json, datetime
LAST_ANALYSIS: 2025-12-15
"""

import re
import yaml
import unicodedata
import json
from datetime import datetime

def slugify(value):
    if not value: return ""
    value = str(value).lower()
    replacements = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', '&': 'und', '+': 'und'}
    for k, v in replacements.items():
        value = value.replace(k, v)

    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value).strip()
    return re.sub(r'[-\s]+', '-', value)

def normalize_meta_and_body(meta, body):
    ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"}
    clean_meta = {}
    extra_content = []

    if "titel" in meta and "title" not in meta:
        meta["title"] = meta.pop("titel")

    tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"]
    all_tags = []
    for key in tag_candidates:
        if key in meta:
            val = meta[key]
            if isinstance(val, list): all_tags.extend(val)
            elif isinstance(val, str): all_tags.extend([t.strip() for t in val.split(",")])

    for key, val in meta.items():
        if key in ALLOWED_KEYS:
            clean_meta[key] = val
        elif key in tag_candidates:
            pass
        else:
            if val and isinstance(val, str):
                header = key.replace("_", " ").title()
                extra_content.append(f"## {header}\n{val}\n")

    if all_tags:
        clean_tags = []
        for t in all_tags:
            t_clean = str(t).replace("#", "").strip()
            if t_clean: clean_tags.append(t_clean)
        clean_meta["tags"] = list(set(clean_tags))

    if extra_content:
        new_section = "\n".join(extra_content)
        final_body = f"{new_section}\n{body}"
    else:
        final_body = body

    return clean_meta, final_body

def parse_markdown_draft(full_text):
    clean_text = full_text.strip()
    pattern_block = r"```(?:markdown|md|yaml)?\s*(.*?)\s*```"
    match_block = re.search(pattern_block, clean_text, re.DOTALL | re.IGNORECASE)
    if match_block:
        clean_text = match_block.group(1).strip()

    meta = {}
    body = clean_text
    yaml_str = ""

    parts = re.split(r"^---+\s*$", clean_text, maxsplit=2, flags=re.MULTILINE)

    if len(parts) >= 3:
        yaml_str = parts[1]
        body = parts[2]
    elif clean_text.startswith("---"):
        fallback_match = re.search(r"^---\s*(.*?)(?=\n#)", clean_text, re.DOTALL | re.MULTILINE)
        if fallback_match:
            yaml_str = fallback_match.group(1)
            body = clean_text.replace(f"---{yaml_str}", "", 1).strip()

    if yaml_str:
        yaml_str_clean = yaml_str.replace("#", "")
        try:
            parsed = yaml.safe_load(yaml_str_clean)
            if isinstance(parsed, dict):
                meta = parsed
        except Exception as e:
            print(f"YAML Parsing Warning: {e}")

    if not meta.get("title"):
        h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
        if h1_match:
            meta["title"] = h1_match.group(1).strip()

    if meta.get("type") == "draft":
        meta["status"] = "draft"
        meta["type"] = "experience"

    return normalize_meta_and_body(meta, body)

def build_markdown_doc(meta, body):
    if "id" not in meta or meta["id"] == "generated_on_save":
        raw_title = meta.get('title', 'note')
        clean_slug = slugify(raw_title)[:50] or "note"
        meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{clean_slug}"

    meta["updated"] = datetime.now().strftime("%Y-%m-%d")

    ordered_meta = {}
    prio_keys = ["id", "type", "title", "status", "tags"]
    for k in prio_keys:
        if k in meta: ordered_meta[k] = meta.pop(k)
    ordered_meta.update(meta)

    try:
        yaml_str = yaml.dump(ordered_meta, default_flow_style=None, sort_keys=False, allow_unicode=True).strip()
    except:
        yaml_str = "error: generating_yaml"

    return f"---\n{yaml_str}\n---\n\n{body}"

def load_history_from_logs(filepath, limit=10):
    queries = []
    if filepath.exists():
        try:
            with open(filepath, "r", encoding="utf-8") as f:
                lines = f.readlines()
                for line in reversed(lines):
                    try:
                        entry = json.loads(line)
                        q = entry.get("query_text")
                        if q and q not in queries:
                            queries.append(q)
                        if len(queries) >= limit: break
                    except: continue
        except: pass
    return queries