146 lines
4.8 KiB
Python
146 lines
4.8 KiB
Python
"""
|
|
FILE: app/frontend/ui_utils.py
|
|
DESCRIPTION: String-Utilities. Parser für Markdown/YAML (LLM-Healing) und Helper für History-Loading.
|
|
VERSION: 2.6.0
|
|
STATUS: Active
|
|
DEPENDENCIES: re, yaml, unicodedata, json, datetime
|
|
LAST_ANALYSIS: 2025-12-15
|
|
"""
|
|
|
|
import re
|
|
import yaml
|
|
import unicodedata
|
|
import json
|
|
from datetime import datetime
|
|
|
|
def slugify(value):
|
|
if not value: return ""
|
|
value = str(value).lower()
|
|
replacements = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', '&': 'und', '+': 'und'}
|
|
for k, v in replacements.items():
|
|
value = value.replace(k, v)
|
|
|
|
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
|
|
value = re.sub(r'[^\w\s-]', '', value).strip()
|
|
return re.sub(r'[-\s]+', '-', value)
|
|
|
|
def normalize_meta_and_body(meta, body):
|
|
ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"}
|
|
clean_meta = {}
|
|
extra_content = []
|
|
|
|
if "titel" in meta and "title" not in meta:
|
|
meta["title"] = meta.pop("titel")
|
|
|
|
tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"]
|
|
all_tags = []
|
|
for key in tag_candidates:
|
|
if key in meta:
|
|
val = meta[key]
|
|
if isinstance(val, list): all_tags.extend(val)
|
|
elif isinstance(val, str): all_tags.extend([t.strip() for t in val.split(",")])
|
|
|
|
for key, val in meta.items():
|
|
if key in ALLOWED_KEYS:
|
|
clean_meta[key] = val
|
|
elif key in tag_candidates:
|
|
pass
|
|
else:
|
|
if val and isinstance(val, str):
|
|
header = key.replace("_", " ").title()
|
|
extra_content.append(f"## {header}\n{val}\n")
|
|
|
|
if all_tags:
|
|
clean_tags = []
|
|
for t in all_tags:
|
|
t_clean = str(t).replace("#", "").strip()
|
|
if t_clean: clean_tags.append(t_clean)
|
|
clean_meta["tags"] = list(set(clean_tags))
|
|
|
|
if extra_content:
|
|
new_section = "\n".join(extra_content)
|
|
final_body = f"{new_section}\n{body}"
|
|
else:
|
|
final_body = body
|
|
|
|
return clean_meta, final_body
|
|
|
|
def parse_markdown_draft(full_text):
|
|
clean_text = full_text.strip()
|
|
pattern_block = r"```(?:markdown|md|yaml)?\s*(.*?)\s*```"
|
|
match_block = re.search(pattern_block, clean_text, re.DOTALL | re.IGNORECASE)
|
|
if match_block:
|
|
clean_text = match_block.group(1).strip()
|
|
|
|
meta = {}
|
|
body = clean_text
|
|
yaml_str = ""
|
|
|
|
parts = re.split(r"^---+\s*$", clean_text, maxsplit=2, flags=re.MULTILINE)
|
|
|
|
if len(parts) >= 3:
|
|
yaml_str = parts[1]
|
|
body = parts[2]
|
|
elif clean_text.startswith("---"):
|
|
fallback_match = re.search(r"^---\s*(.*?)(?=\n#)", clean_text, re.DOTALL | re.MULTILINE)
|
|
if fallback_match:
|
|
yaml_str = fallback_match.group(1)
|
|
body = clean_text.replace(f"---{yaml_str}", "", 1).strip()
|
|
|
|
if yaml_str:
|
|
yaml_str_clean = yaml_str.replace("#", "")
|
|
try:
|
|
parsed = yaml.safe_load(yaml_str_clean)
|
|
if isinstance(parsed, dict):
|
|
meta = parsed
|
|
except Exception as e:
|
|
print(f"YAML Parsing Warning: {e}")
|
|
|
|
if not meta.get("title"):
|
|
h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
|
|
if h1_match:
|
|
meta["title"] = h1_match.group(1).strip()
|
|
|
|
if meta.get("type") == "draft":
|
|
meta["status"] = "draft"
|
|
meta["type"] = "experience"
|
|
|
|
return normalize_meta_and_body(meta, body)
|
|
|
|
def build_markdown_doc(meta, body):
|
|
if "id" not in meta or meta["id"] == "generated_on_save":
|
|
raw_title = meta.get('title', 'note')
|
|
clean_slug = slugify(raw_title)[:50] or "note"
|
|
meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{clean_slug}"
|
|
|
|
meta["updated"] = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
ordered_meta = {}
|
|
prio_keys = ["id", "type", "title", "status", "tags"]
|
|
for k in prio_keys:
|
|
if k in meta: ordered_meta[k] = meta.pop(k)
|
|
ordered_meta.update(meta)
|
|
|
|
try:
|
|
yaml_str = yaml.dump(ordered_meta, default_flow_style=None, sort_keys=False, allow_unicode=True).strip()
|
|
except:
|
|
yaml_str = "error: generating_yaml"
|
|
|
|
return f"---\n{yaml_str}\n---\n\n{body}"
|
|
|
|
def load_history_from_logs(filepath, limit=10):
|
|
queries = []
|
|
if filepath.exists():
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
for line in reversed(lines):
|
|
try:
|
|
entry = json.loads(line)
|
|
q = entry.get("query_text")
|
|
if q and q not in queries:
|
|
queries.append(q)
|
|
if len(queries) >= limit: break
|
|
except: continue
|
|
except: pass
|
|
return queries |