WP15 #9

Merged
Lars merged 54 commits from WP15 into main 2025-12-13 06:39:48 +01:00
Showing only changes of commit 0ba8ae8d1e - Show all commits

View File

@ -5,6 +5,7 @@ import os
import json import json
import re import re
import yaml import yaml
import unicodedata
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
@ -71,15 +72,24 @@ if "user_id" not in st.session_state: st.session_state.user_id = str(uuid.uuid4(
# --- HELPER FUNCTIONS --- # --- HELPER FUNCTIONS ---
def slugify(value):
"""Erzeugt saubere Dateinamen aus Titeln."""
value = str(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
return re.sub(r'[-\s]+', '-', value)
def normalize_meta_and_body(meta, body): def normalize_meta_and_body(meta, body):
"""Sanitizer: Stellt sicher, dass nur erlaubte Felder im Frontmatter bleiben.""" """Sanitizer: Stellt sicher, dass nur erlaubte Felder im Frontmatter bleiben."""
ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"} ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"}
clean_meta = {} clean_meta = {}
extra_content = [] extra_content = []
# Title Normalization
if "titel" in meta and "title" not in meta: if "titel" in meta and "title" not in meta:
meta["title"] = meta.pop("titel") meta["title"] = meta.pop("titel")
# Tag Normalization
tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"] tag_candidates = ["tags", "emotionale_keywords", "keywords", "schluesselwoerter"]
all_tags = [] all_tags = []
for key in tag_candidates: for key in tag_candidates:
@ -99,7 +109,12 @@ def normalize_meta_and_body(meta, body):
extra_content.append(f"## {header}\n{val}\n") extra_content.append(f"## {header}\n{val}\n")
if all_tags: if all_tags:
clean_meta["tags"] = list(set(all_tags)) # Bereinige Tags von '#' und Duplikaten
clean_tags = []
for t in all_tags:
t_clean = str(t).replace("#", "").strip()
if t_clean: clean_tags.append(t_clean)
clean_meta["tags"] = list(set(clean_tags))
if extra_content: if extra_content:
new_section = "\n".join(extra_content) new_section = "\n".join(extra_content)
@ -110,7 +125,7 @@ def normalize_meta_and_body(meta, body):
return clean_meta, final_body return clean_meta, final_body
def parse_markdown_draft(full_text): def parse_markdown_draft(full_text):
"""Robustes Parsing + Sanitization.""" """Robustes Parsing + Sanitization (YAML + Fallbacks)."""
clean_text = full_text clean_text = full_text
# 1. Markdown Code-Blöcke entfernen # 1. Markdown Code-Blöcke entfernen
@ -129,9 +144,7 @@ def parse_markdown_draft(full_text):
yaml_str = parts[1] yaml_str = parts[1]
body_candidate = parts[2] body_candidate = parts[2]
# --- FIX 1: Hashtag-Cleaner für YAML --- # FIX 1: Hashtag-Cleaner für YAML (gegen Syntaxfehler)
# Entfernt #, wenn sie innerhalb von [] stehen, um YAML-Kommentare zu verhindern.
# Wir entfernen pauschal # im YAML-Block, da Tags dort keine brauchen.
yaml_str_clean = yaml_str.replace("#", "") yaml_str_clean = yaml_str.replace("#", "")
try: try:
@ -141,23 +154,28 @@ def parse_markdown_draft(full_text):
body = body_candidate.strip() body = body_candidate.strip()
except Exception as e: except Exception as e:
print(f"YAML Parsing Warning: {e}") print(f"YAML Parsing Warning: {e}")
# Fallback: Body retten, Meta leer lassen
body = body_candidate.strip() body = body_candidate.strip()
# --- FIX 2: Type/Status Swap Korrektur --- # FIX 3: Titel-Fallback aus H1
# Wenn das LLM halluziniert hat: type='draft' -> Das ist eigentlich der Status. if not meta.get("title"):
h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
if h1_match:
meta["title"] = h1_match.group(1).strip()
# FIX 4: Type/Status Swap Korrektur
if meta.get("type") == "draft": if meta.get("type") == "draft":
meta["status"] = "draft" meta["status"] = "draft"
# Wir raten einen besseren Typ oder setzen default meta["type"] = "experience"
meta["type"] = "experience" # Da Interviews oft Experiences sind
return normalize_meta_and_body(meta, body) return normalize_meta_and_body(meta, body)
def build_markdown_doc(meta, body): def build_markdown_doc(meta, body):
"""Baut das finale Dokument zusammen.""" """Baut das finale Dokument zusammen."""
# ID Generation
if "id" not in meta or meta["id"] == "generated_on_save": if "id" not in meta or meta["id"] == "generated_on_save":
safe_title = re.sub(r'[^a-zA-Z0-9]', '-', meta.get('title', 'note')).lower()[:30] # Nutze slugify für ID
meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}-{uuid.uuid4().hex[:4]}" clean_slug = slugify(meta.get('title', 'note'))[:40] or "note"
meta["id"] = f"{datetime.now().strftime('%Y%m%d')}-{clean_slug}"
meta["updated"] = datetime.now().strftime("%Y-%m-%d") meta["updated"] = datetime.now().strftime("%Y-%m-%d")
@ -206,7 +224,6 @@ def send_chat_message(message: str, top_k: int, explain: bool):
return {"error": str(e)} return {"error": str(e)}
def analyze_draft_text(text: str, n_type: str): def analyze_draft_text(text: str, n_type: str):
"""Ruft den neuen Intelligence-Service (WP-11) auf."""
try: try:
response = requests.post( response = requests.post(
INGEST_ANALYZE_ENDPOINT, INGEST_ANALYZE_ENDPOINT,
@ -219,7 +236,6 @@ def analyze_draft_text(text: str, n_type: str):
return {"error": str(e)} return {"error": str(e)}
def save_draft_to_vault(markdown_content: str, filename: str = None): def save_draft_to_vault(markdown_content: str, filename: str = None):
"""Ruft den neuen Persistence-Service (WP-11) auf."""
try: try:
response = requests.post( response = requests.post(
INGEST_SAVE_ENDPOINT, INGEST_SAVE_ENDPOINT,
@ -242,7 +258,7 @@ def submit_feedback(query_id, node_id, score, comment=None):
def render_sidebar(): def render_sidebar():
with st.sidebar: with st.sidebar:
st.title("🧠 mindnet") st.title("🧠 mindnet")
st.caption("v2.4 | Async Intelligence") st.caption("v2.4.2 | Robust UI")
mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0) mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
st.divider() st.divider()
st.subheader("⚙️ Settings") st.subheader("⚙️ Settings")
@ -257,7 +273,6 @@ def render_sidebar():
return mode, top_k, explain return mode, top_k, explain
def render_draft_editor(msg): def render_draft_editor(msg):
# Ensure ID Stability
if "query_id" not in msg or not msg["query_id"]: if "query_id" not in msg or not msg["query_id"]:
msg["query_id"] = str(uuid.uuid4()) msg["query_id"] = str(uuid.uuid4())
@ -270,7 +285,7 @@ def render_draft_editor(msg):
widget_body_key = f"{key_base}_widget_body" widget_body_key = f"{key_base}_widget_body"
data_body_key = f"{key_base}_data_body" data_body_key = f"{key_base}_data_body"
# --- 1. INIT STATE (Nur beim allerersten Laden der Message) --- # --- 1. INIT STATE ---
if f"{key_base}_init" not in st.session_state: if f"{key_base}_init" not in st.session_state:
meta, body = parse_markdown_draft(msg["content"]) meta, body = parse_markdown_draft(msg["content"])
if "type" not in meta: meta["type"] = "default" if "type" not in meta: meta["type"] = "default"
@ -278,27 +293,24 @@ def render_draft_editor(msg):
tags = meta.get("tags", []) tags = meta.get("tags", [])
meta["tags_str"] = ", ".join(tags) if isinstance(tags, list) else str(tags) meta["tags_str"] = ", ".join(tags) if isinstance(tags, list) else str(tags)
# Persistent Data (Source of Truth) # Persistent Data
st.session_state[data_meta_key] = meta st.session_state[data_meta_key] = meta
st.session_state[data_sugg_key] = [] st.session_state[data_sugg_key] = []
st.session_state[data_body_key] = body.strip() st.session_state[data_body_key] = body.strip()
# WIDGET KEYS INITIALISIEREN (Resurrection Fix) # Widgets Init
# Wir setzen die Werte direkt in den Widget-Key, damit Streamlit sie beim ersten Render findet.
st.session_state[f"{key_base}_wdg_title"] = meta["title"] st.session_state[f"{key_base}_wdg_title"] = meta["title"]
st.session_state[f"{key_base}_wdg_type"] = meta["type"] st.session_state[f"{key_base}_wdg_type"] = meta["type"]
st.session_state[f"{key_base}_wdg_tags"] = meta["tags_str"] st.session_state[f"{key_base}_wdg_tags"] = meta["tags_str"]
st.session_state[f"{key_base}_init"] = True st.session_state[f"{key_base}_init"] = True
# --- 2. RESURRECTION FIX (Body) --- # --- 2. RESURRECTION ---
# Wenn wir vom Manuellen Editor zurückkommen, ist das Widget weg, aber die Daten sind noch da.
if widget_body_key not in st.session_state and data_body_key in st.session_state: if widget_body_key not in st.session_state and data_body_key in st.session_state:
st.session_state[widget_body_key] = st.session_state[data_body_key] st.session_state[widget_body_key] = st.session_state[data_body_key]
# --- CALLBACKS --- # --- CALLBACKS ---
def _sync_meta(): def _sync_meta():
# Schreibt Widget-Werte zurück in den Meta-Store
meta = st.session_state[data_meta_key] meta = st.session_state[data_meta_key]
meta["title"] = st.session_state.get(f"{key_base}_wdg_title", "") meta["title"] = st.session_state.get(f"{key_base}_wdg_title", "")
meta["type"] = st.session_state.get(f"{key_base}_wdg_type", "default") meta["type"] = st.session_state.get(f"{key_base}_wdg_type", "default")
@ -306,11 +318,9 @@ def render_draft_editor(msg):
st.session_state[data_meta_key] = meta st.session_state[data_meta_key] = meta
def _sync_body(): def _sync_body():
# Sync Widget -> Data (Source of Truth)
st.session_state[data_body_key] = st.session_state[widget_body_key] st.session_state[data_body_key] = st.session_state[widget_body_key]
def _insert_text(text_to_insert): def _insert_text(text_to_insert):
# Insert in Widget Key und Sync Data
current = st.session_state.get(widget_body_key, "") current = st.session_state.get(widget_body_key, "")
new_text = f"{current}\n\n{text_to_insert}" new_text = f"{current}\n\n{text_to_insert}"
st.session_state[widget_body_key] = new_text st.session_state[widget_body_key] = new_text
@ -326,26 +336,19 @@ def render_draft_editor(msg):
st.markdown(f'<div class="draft-box">', unsafe_allow_html=True) st.markdown(f'<div class="draft-box">', unsafe_allow_html=True)
st.markdown("### 📝 Entwurf bearbeiten") st.markdown("### 📝 Entwurf bearbeiten")
# Metadata Form
meta_ref = st.session_state[data_meta_key] meta_ref = st.session_state[data_meta_key]
c1, c2 = st.columns([2, 1]) c1, c2 = st.columns([2, 1])
with c1: with c1:
# FIX: Keine 'value=' Angabe, da der Key schon existiert
st.text_input("Titel", key=f"{key_base}_wdg_title", on_change=_sync_meta) st.text_input("Titel", key=f"{key_base}_wdg_title", on_change=_sync_meta)
with c2: with c2:
known_types = ["concept", "project", "decision", "experience", "journal", "value", "goal", "principle", "risk", "belief"] known_types = ["concept", "project", "decision", "experience", "journal", "value", "goal", "principle", "risk", "belief"]
# Sicherstellen, dass der aktuelle Typ in der Liste ist
curr_type = st.session_state.get(f"{key_base}_wdg_type", meta_ref["type"]) curr_type = st.session_state.get(f"{key_base}_wdg_type", meta_ref["type"])
if curr_type not in known_types: known_types.append(curr_type) if curr_type not in known_types: known_types.append(curr_type)
# FIX: Keine 'index=' Angabe, da der Key schon existiert
st.selectbox("Typ", known_types, key=f"{key_base}_wdg_type", on_change=_sync_meta) st.selectbox("Typ", known_types, key=f"{key_base}_wdg_type", on_change=_sync_meta)
# Tags
st.text_input("Tags", key=f"{key_base}_wdg_tags", on_change=_sync_meta) st.text_input("Tags", key=f"{key_base}_wdg_tags", on_change=_sync_meta)
# Tabs
tab_edit, tab_intel, tab_view = st.tabs(["✏️ Inhalt", "🧠 Intelligence", "👁️ Vorschau"]) tab_edit, tab_intel, tab_view = st.tabs(["✏️ Inhalt", "🧠 Intelligence", "👁️ Vorschau"])
# --- TAB 1: EDITOR --- # --- TAB 1: EDITOR ---
@ -365,7 +368,6 @@ def render_draft_editor(msg):
if st.button("🔍 Analyse starten", key=f"{key_base}_analyze"): if st.button("🔍 Analyse starten", key=f"{key_base}_analyze"):
st.session_state[data_sugg_key] = [] st.session_state[data_sugg_key] = []
# Lese vom Widget (aktuell) oder Data (Fallback)
text_to_analyze = st.session_state.get(widget_body_key, st.session_state.get(data_body_key, "")) text_to_analyze = st.session_state.get(widget_body_key, st.session_state.get(data_body_key, ""))
current_doc_type = st.session_state.get(f"{key_base}_wdg_type", "concept") current_doc_type = st.session_state.get(f"{key_base}_wdg_type", "concept")
@ -382,7 +384,6 @@ def render_draft_editor(msg):
else: else:
st.success(f"{len(suggestions)} Vorschläge gefunden.") st.success(f"{len(suggestions)} Vorschläge gefunden.")
# Render List
suggestions = st.session_state[data_sugg_key] suggestions = st.session_state[data_sugg_key]
if suggestions: if suggestions:
current_text_state = st.session_state.get(widget_body_key, "") current_text_state = st.session_state.get(widget_body_key, "")
@ -414,11 +415,12 @@ def render_draft_editor(msg):
final_meta = { final_meta = {
"id": "generated_on_save", "id": "generated_on_save",
"type": st.session_state.get(f"{key_base}_wdg_type", "default"), "type": st.session_state.get(f"{key_base}_wdg_type", "default"),
"title": st.session_state.get(f"{key_base}_wdg_title", "Untitled"), # Title mit Fallback (Widget > Meta > Untitled)
"title": st.session_state.get(f"{key_base}_wdg_title", meta_ref.get("title", "Untitled")),
"status": "draft", "status": "draft",
"tags": final_tags "tags": final_tags
} }
# Final Doc aus Data
final_body = st.session_state.get(widget_body_key, st.session_state[data_body_key]) final_body = st.session_state.get(widget_body_key, st.session_state[data_body_key])
final_doc = build_markdown_doc(final_meta, final_body) final_doc = build_markdown_doc(final_meta, final_body)
@ -433,14 +435,9 @@ def render_draft_editor(msg):
with b1: with b1:
if st.button("💾 Speichern & Indizieren", type="primary", key=f"{key_base}_save"): if st.button("💾 Speichern & Indizieren", type="primary", key=f"{key_base}_save"):
with st.spinner("Speichere im Vault..."): with st.spinner("Speichere im Vault..."):
# safe_title = re.sub(r'[^a-zA-Z0-9]', '-', final_meta["title"]).lower()[:30] or "draft" raw_title = final_meta.get("title", "draft")
# FIX: .get() verwenden, falls 'title' fehlt # Slugify für saubere Dateinamen
raw_title = final_meta.get("title", "draft") safe_title = slugify(raw_title)[:40] or "draft"
safe_title = re.sub(r'[^a-zA-Z0-9]', '-', raw_title).lower()[:30] or "draft"
fname = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}.md"
result = save_draft_to_vault(final_doc, filename=fname)
fname = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}.md" fname = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}.md"
result = save_draft_to_vault(final_doc, filename=fname) result = save_draft_to_vault(final_doc, filename=fname)
@ -460,7 +457,6 @@ def render_chat_interface(top_k, explain):
for idx, msg in enumerate(st.session_state.messages): for idx, msg in enumerate(st.session_state.messages):
with st.chat_message(msg["role"]): with st.chat_message(msg["role"]):
if msg["role"] == "assistant": if msg["role"] == "assistant":
# Header
intent = msg.get("intent", "UNKNOWN") intent = msg.get("intent", "UNKNOWN")
src = msg.get("intent_source", "?") src = msg.get("intent_source", "?")
icon = {"EMPATHY":"❤️", "DECISION":"⚖️", "CODING":"💻", "FACT":"📚", "INTERVIEW":"📝"}.get(intent, "🧠") icon = {"EMPATHY":"❤️", "DECISION":"⚖️", "CODING":"💻", "FACT":"📚", "INTERVIEW":"📝"}.get(intent, "🧠")
@ -469,13 +465,11 @@ def render_chat_interface(top_k, explain):
with st.expander("🐞 Debug Raw Payload", expanded=False): with st.expander("🐞 Debug Raw Payload", expanded=False):
st.json(msg) st.json(msg)
# Logic
if intent == "INTERVIEW": if intent == "INTERVIEW":
render_draft_editor(msg) render_draft_editor(msg)
else: else:
st.markdown(msg["content"]) st.markdown(msg["content"])
# Sources
if "sources" in msg and msg["sources"]: if "sources" in msg and msg["sources"]:
for hit in msg["sources"]: for hit in msg["sources"]:
with st.expander(f"📄 {hit.get('note_id', '?')} ({hit.get('total_score', 0):.2f})"): with st.expander(f"📄 {hit.get('note_id', '?')} ({hit.get('total_score', 0):.2f})"):