mindnet/app/frontend/ui_utils.py
2025-12-15 15:40:39 +01:00

146 lines
4.8 KiB
Python

"""
FILE: app/frontend/ui_utils.py
DESCRIPTION: String-Utilities. Parser für Markdown/YAML (LLM-Healing) und Helper für History-Loading.
VERSION: 2.6.0
STATUS: Active
DEPENDENCIES: re, yaml, unicodedata, json, datetime
LAST_ANALYSIS: 2025-12-15
"""
import re
import yaml
import unicodedata
import json
from datetime import datetime
def slugify(value):
if not value: return ""
value = str(value).lower()
replacements = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', '&': 'und', '+': 'und'}
for k, v in replacements.items():
value = value.replace(k, v)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value).strip()
return re.sub(r'[-\s]+', '-', value)
def normalize_meta_and_body(meta, body):
ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"}
clean_meta = {}
extra_content = []
if "titel" in meta and "title" not in meta:
meta["title"] = meta.pop("titel")
tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"]
all_tags = []
for key in tag_candidates:
if key in meta:
val = meta[key]
if isinstance(val, list): all_tags.extend(val)
elif isinstance(val, str): all_tags.extend([t.strip() for t in val.split(",")])
for key, val in meta.items():
if key in ALLOWED_KEYS:
clean_meta[key] = val
elif key in tag_candidates:
pass
else:
if val and isinstance(val, str):
header = key.replace("_", " ").title()
extra_content.append(f"## {header}\n{val}\n")
if all_tags:
clean_tags = []
for t in all_tags:
t_clean = str(t).replace("#", "").strip()
if t_clean: clean_tags.append(t_clean)
clean_meta["tags"] = list(set(clean_tags))
if extra_content:
new_section = "\n".join(extra_content)
final_body = f"{new_section}\n{body}"
else:
final_body = body
return clean_meta, final_body
def parse_markdown_draft(full_text):
clean_text = full_text.strip()
pattern_block = r"```(?:markdown|md|yaml)?\s*(.*?)\s*```"
match_block = re.search(pattern_block, clean_text, re.DOTALL | re.IGNORECASE)
if match_block:
clean_text = match_block.group(1).strip()
meta = {}
body = clean_text
yaml_str = ""
parts = re.split(r"^---+\s*$", clean_text, maxsplit=2, flags=re.MULTILINE)
if len(parts) >= 3:
yaml_str = parts[1]
body = parts[2]
elif clean_text.startswith("---"):
fallback_match = re.search(r"^---\s*(.*?)(?=\n#)", clean_text, re.DOTALL | re.MULTILINE)
if fallback_match:
yaml_str = fallback_match.group(1)
body = clean_text.replace(f"---{yaml_str}", "", 1).strip()
if yaml_str:
yaml_str_clean = yaml_str.replace("#", "")
try:
parsed = yaml.safe_load(yaml_str_clean)
if isinstance(parsed, dict):
meta = parsed
except Exception as e:
print(f"YAML Parsing Warning: {e}")
if not meta.get("title"):
h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
if h1_match:
meta["title"] = h1_match.group(1).strip()
if meta.get("type") == "draft":
meta["status"] = "draft"
meta["type"] = "experience"
return normalize_meta_and_body(meta, body)
def build_markdown_doc(meta, body):
if "id" not in meta or meta["id"] == "generated_on_save":
raw_title = meta.get('title', 'note')
clean_slug = slugify(raw_title)[:50] or "note"
meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{clean_slug}"
meta["updated"] = datetime.now().strftime("%Y-%m-%d")
ordered_meta = {}
prio_keys = ["id", "type", "title", "status", "tags"]
for k in prio_keys:
if k in meta: ordered_meta[k] = meta.pop(k)
ordered_meta.update(meta)
try:
yaml_str = yaml.dump(ordered_meta, default_flow_style=None, sort_keys=False, allow_unicode=True).strip()
except:
yaml_str = "error: generating_yaml"
return f"---\n{yaml_str}\n---\n\n{body}"
def load_history_from_logs(filepath, limit=10):
queries = []
if filepath.exists():
try:
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
for line in reversed(lines):
try:
entry = json.loads(line)
q = entry.get("query_text")
if q and q not in queries:
queries.append(q)
if len(queries) >= limit: break
except: continue
except: pass
return queries