WP15 #9

Merged
Lars merged 54 commits from WP15 into main 2025-12-13 06:39:48 +01:00
Showing only changes of commit 0ba8ae8d1e - Show all commits

View File

@ -5,6 +5,7 @@ import os
import json
import re
import yaml
import unicodedata
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
@ -71,15 +72,24 @@ if "user_id" not in st.session_state: st.session_state.user_id = str(uuid.uuid4(
# --- HELPER FUNCTIONS ---
def slugify(value):
"""Erzeugt saubere Dateinamen aus Titeln."""
value = str(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
return re.sub(r'[-\s]+', '-', value)
def normalize_meta_and_body(meta, body):
"""Sanitizer: Stellt sicher, dass nur erlaubte Felder im Frontmatter bleiben."""
ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"}
clean_meta = {}
extra_content = []
# Title Normalization
if "titel" in meta and "title" not in meta:
meta["title"] = meta.pop("titel")
# Tag Normalization
tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"]
all_tags = []
for key in tag_candidates:
@ -99,7 +109,12 @@ def normalize_meta_and_body(meta, body):
extra_content.append(f"## {header}\n{val}\n")
if all_tags:
clean_meta["tags"] = list(set(all_tags))
# Bereinige Tags von '#' und Duplikaten
clean_tags = []
for t in all_tags:
t_clean = str(t).replace("#", "").strip()
if t_clean: clean_tags.append(t_clean)
clean_meta["tags"] = list(set(clean_tags))
if extra_content:
new_section = "\n".join(extra_content)
@ -110,7 +125,7 @@ def normalize_meta_and_body(meta, body):
return clean_meta, final_body
def parse_markdown_draft(full_text):
"""Robustes Parsing + Sanitization."""
"""Robustes Parsing + Sanitization (YAML + Fallbacks)."""
clean_text = full_text
# 1. Markdown Code-Blöcke entfernen
@ -129,9 +144,7 @@ def parse_markdown_draft(full_text):
yaml_str = parts[1]
body_candidate = parts[2]
# --- FIX 1: Hashtag-Cleaner für YAML ---
# Entfernt #, wenn sie innerhalb von [] stehen, um YAML-Kommentare zu verhindern.
# Wir entfernen pauschal # im YAML-Block, da Tags dort keine brauchen.
# FIX 1: Hashtag-Cleaner für YAML (gegen Syntaxfehler)
yaml_str_clean = yaml_str.replace("#", "")
try:
@ -141,23 +154,28 @@ def parse_markdown_draft(full_text):
body = body_candidate.strip()
except Exception as e:
print(f"YAML Parsing Warning: {e}")
# Fallback: Body retten, Meta leer lassen
body = body_candidate.strip()
# --- FIX 2: Type/Status Swap Korrektur ---
# Wenn das LLM halluziniert hat: type='draft' -> Das ist eigentlich der Status.
# FIX 3: Titel-Fallback aus H1
if not meta.get("title"):
h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
if h1_match:
meta["title"] = h1_match.group(1).strip()
# FIX 4: Type/Status Swap Korrektur
if meta.get("type") == "draft":
meta["status"] = "draft"
# Wir raten einen besseren Typ oder setzen default
meta["type"] = "experience" # Da Interviews oft Experiences sind
meta["type"] = "experience"
return normalize_meta_and_body(meta, body)
def build_markdown_doc(meta, body):
"""Baut das finale Dokument zusammen."""
# ID Generation
if "id" not in meta or meta["id"] == "generated_on_save":
safe_title = re.sub(r'[^a-zA-Z0-9]', '-', meta.get('title', 'note')).lower()[:30]
meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}-{uuid.uuid4().hex[:4]}"
# Nutze slugify für ID
clean_slug = slugify(meta.get('title', 'note'))[:40] or "note"
meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{clean_slug}"
meta["updated"] = datetime.now().strftime("%Y-%m-%d")
@ -206,7 +224,6 @@ def send_chat_message(message: str, top_k: int, explain: bool):
return {"error": str(e)}
def analyze_draft_text(text: str, n_type: str):
"""Ruft den neuen Intelligence-Service (WP-11) auf."""
try:
response = requests.post(
INGEST_ANALYZE_ENDPOINT,
@ -219,7 +236,6 @@ def analyze_draft_text(text: str, n_type: str):
return {"error": str(e)}
def save_draft_to_vault(markdown_content: str, filename: str = None):
"""Ruft den neuen Persistence-Service (WP-11) auf."""
try:
response = requests.post(
INGEST_SAVE_ENDPOINT,
@ -242,7 +258,7 @@ def submit_feedback(query_id, node_id, score, comment=None):
def render_sidebar():
with st.sidebar:
st.title("🧠 mindnet")
st.caption("v2.4 | Async Intelligence")
st.caption("v2.4.2 | Robust UI")
mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
st.divider()
st.subheader("⚙️ Settings")
@ -257,7 +273,6 @@ def render_sidebar():
return mode, top_k, explain
def render_draft_editor(msg):
# Ensure ID Stability
if "query_id" not in msg or not msg["query_id"]:
msg["query_id"] = str(uuid.uuid4())
@ -270,7 +285,7 @@ def render_draft_editor(msg):
widget_body_key = f"{key_base}_widget_body"
data_body_key = f"{key_base}_data_body"
# --- 1. INIT STATE (Nur beim allerersten Laden der Message) ---
# --- 1. INIT STATE ---
if f"{key_base}_init" not in st.session_state:
meta, body = parse_markdown_draft(msg["content"])
if "type" not in meta: meta["type"] = "default"
@ -278,27 +293,24 @@ def render_draft_editor(msg):
tags = meta.get("tags", [])
meta["tags_str"] = ", ".join(tags) if isinstance(tags, list) else str(tags)
# Persistent Data (Source of Truth)
# Persistent Data
st.session_state[data_meta_key] = meta
st.session_state[data_sugg_key] = []
st.session_state[data_body_key] = body.strip()
# WIDGET KEYS INITIALISIEREN (Resurrection Fix)
# Wir setzen die Werte direkt in den Widget-Key, damit Streamlit sie beim ersten Render findet.
# Widgets Init
st.session_state[f"{key_base}_wdg_title"] = meta["title"]
st.session_state[f"{key_base}_wdg_type"] = meta["type"]
st.session_state[f"{key_base}_wdg_tags"] = meta["tags_str"]
st.session_state[f"{key_base}_init"] = True
# --- 2. RESURRECTION FIX (Body) ---
# Wenn wir vom Manuellen Editor zurückkommen, ist das Widget weg, aber die Daten sind noch da.
# --- 2. RESURRECTION ---
if widget_body_key not in st.session_state and data_body_key in st.session_state:
st.session_state[widget_body_key] = st.session_state[data_body_key]
# --- CALLBACKS ---
def _sync_meta():
# Schreibt Widget-Werte zurück in den Meta-Store
meta = st.session_state[data_meta_key]
meta["title"] = st.session_state.get(f"{key_base}_wdg_title", "")
meta["type"] = st.session_state.get(f"{key_base}_wdg_type", "default")
@ -306,11 +318,9 @@ def render_draft_editor(msg):
st.session_state[data_meta_key] = meta
def _sync_body():
# Sync Widget -> Data (Source of Truth)
st.session_state[data_body_key] = st.session_state[widget_body_key]
def _insert_text(text_to_insert):
# Insert in Widget Key und Sync Data
current = st.session_state.get(widget_body_key, "")
new_text = f"{current}\n\n{text_to_insert}"
st.session_state[widget_body_key] = new_text
@ -326,26 +336,19 @@ def render_draft_editor(msg):
st.markdown(f'<div class="draft-box">', unsafe_allow_html=True)
st.markdown("### 📝 Entwurf bearbeiten")
# Metadata Form
meta_ref = st.session_state[data_meta_key]
c1, c2 = st.columns([2, 1])
with c1:
# FIX: Keine 'value=' Angabe, da der Key schon existiert
st.text_input("Titel", key=f"{key_base}_wdg_title", on_change=_sync_meta)
with c2:
known_types = ["concept", "project", "decision", "experience", "journal", "value", "goal", "principle", "risk", "belief"]
# Sicherstellen, dass der aktuelle Typ in der Liste ist
curr_type = st.session_state.get(f"{key_base}_wdg_type", meta_ref["type"])
if curr_type not in known_types: known_types.append(curr_type)
# FIX: Keine 'index=' Angabe, da der Key schon existiert
st.selectbox("Typ", known_types, key=f"{key_base}_wdg_type", on_change=_sync_meta)
# Tags
st.text_input("Tags", key=f"{key_base}_wdg_tags", on_change=_sync_meta)
# Tabs
tab_edit, tab_intel, tab_view = st.tabs(["✏️ Inhalt", "🧠 Intelligence", "👁️ Vorschau"])
# --- TAB 1: EDITOR ---
@ -365,7 +368,6 @@ def render_draft_editor(msg):
if st.button("🔍 Analyse starten", key=f"{key_base}_analyze"):
st.session_state[data_sugg_key] = []
# Lese vom Widget (aktuell) oder Data (Fallback)
text_to_analyze = st.session_state.get(widget_body_key, st.session_state.get(data_body_key, ""))
current_doc_type = st.session_state.get(f"{key_base}_wdg_type", "concept")
@ -382,7 +384,6 @@ def render_draft_editor(msg):
else:
st.success(f"{len(suggestions)} Vorschläge gefunden.")
# Render List
suggestions = st.session_state[data_sugg_key]
if suggestions:
current_text_state = st.session_state.get(widget_body_key, "")
@ -414,11 +415,12 @@ def render_draft_editor(msg):
final_meta = {
"id": "generated_on_save",
"type": st.session_state.get(f"{key_base}_wdg_type", "default"),
"title": st.session_state.get(f"{key_base}_wdg_title", "Untitled"),
# Title mit Fallback (Widget > Meta > Untitled)
"title": st.session_state.get(f"{key_base}_wdg_title", meta_ref.get("title", "Untitled")),
"status": "draft",
"tags": final_tags
}
# Final Doc aus Data
final_body = st.session_state.get(widget_body_key, st.session_state[data_body_key])
final_doc = build_markdown_doc(final_meta, final_body)
@ -433,14 +435,9 @@ def render_draft_editor(msg):
with b1:
if st.button("💾 Speichern & Indizieren", type="primary", key=f"{key_base}_save"):
with st.spinner("Speichere im Vault..."):
# safe_title = re.sub(r'[^a-zA-Z0-9]', '-', final_meta["title"]).lower()[:30] or "draft"
# FIX: .get() verwenden, falls 'title' fehlt
raw_title = final_meta.get("title", "draft")
safe_title = re.sub(r'[^a-zA-Z0-9]', '-', raw_title).lower()[:30] or "draft"
fname = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}.md"
result = save_draft_to_vault(final_doc, filename=fname)
# Slugify für saubere Dateinamen
safe_title = slugify(raw_title)[:40] or "draft"
fname = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}.md"
result = save_draft_to_vault(final_doc, filename=fname)
@ -460,7 +457,6 @@ def render_chat_interface(top_k, explain):
for idx, msg in enumerate(st.session_state.messages):
with st.chat_message(msg["role"]):
if msg["role"] == "assistant":
# Header
intent = msg.get("intent", "UNKNOWN")
src = msg.get("intent_source", "?")
icon = {"EMPATHY":"❤️", "DECISION":"⚖️", "CODING":"💻", "FACT":"📚", "INTERVIEW":"📝"}.get(intent, "🧠")
@ -469,13 +465,11 @@ def render_chat_interface(top_k, explain):
with st.expander("🐞 Debug Raw Payload", expanded=False):
st.json(msg)
# Logic
if intent == "INTERVIEW":
render_draft_editor(msg)
else:
st.markdown(msg["content"])
# Sources
if "sources" in msg and msg["sources"]:
for hit in msg["sources"]:
with st.expander(f"📄 {hit.get('note_id', '?')} ({hit.get('total_score', 0):.2f})"):