WP15 #9

Merged
Lars merged 54 commits from WP15 into main 2025-12-13 06:39:48 +01:00
2 changed files with 50 additions and 49 deletions
Showing only changes of commit 16af07dd21 - Show all commits

View File

@ -1,9 +1,3 @@
# app/frontend/ui.py
# ... (Imports und Setup bleiben gleich) ...
# Ersetze die gesamte Datei mit diesem Inhalt:
import streamlit as st
import requests
import uuid
@ -30,7 +24,7 @@ timeout_setting = os.getenv("MINDNET_API_TIMEOUT") or os.getenv("MINDNET_LLM_TIM
API_TIMEOUT = float(timeout_setting) if timeout_setting else 300.0
# --- PAGE SETUP ---
st.set_page_config(page_title="mindnet v2.4", page_icon="🧠", layout="wide")
st.set_page_config(page_title="mindnet v2.5", page_icon="🧠", layout="wide")
# --- CSS STYLING ---
st.markdown("""
@ -70,10 +64,6 @@ if "user_id" not in st.session_state: st.session_state.user_id = str(uuid.uuid4(
# --- HELPER FUNCTIONS ---
def slugify(value):
"""
Erzeugt saubere Dateinamen (German-Aware).
z.B. "Müller & Söhne" -> "mueller-und-soehne"
"""
if not value: return ""
value = str(value).lower()
replacements = {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', '&': 'und', '+': 'und'}
@ -85,7 +75,6 @@ def slugify(value):
return re.sub(r'[-\s]+', '-', value)
def normalize_meta_and_body(meta, body):
"""Sanitizer: Stellt sicher, dass nur erlaubte Felder im Frontmatter bleiben."""
ALLOWED_KEYS = {"title", "type", "status", "tags", "id", "created", "updated", "aliases", "lang"}
clean_meta = {}
extra_content = []
@ -127,37 +116,49 @@ def normalize_meta_and_body(meta, body):
return clean_meta, final_body
def parse_markdown_draft(full_text):
"""Robustes Parsing + Sanitization (YAML + Fallbacks)."""
"""
HEALING PARSER: Repariert kaputten LLM Output (z.B. fehlendes schließendes '---').
"""
clean_text = full_text.strip()
# 1. Markdown Code-Blöcke entfernen
# 1. Code-Block Wrapper entfernen
pattern_block = r"```(?:markdown|md|yaml)?\s*(.*?)\s*```"
match_block = re.search(pattern_block, clean_text, re.DOTALL | re.IGNORECASE)
if match_block:
clean_text = match_block.group(1).strip()
# 2. Split YAML / Body
parts = re.split(r"^---+\s*$", clean_text, maxsplit=2, flags=re.MULTILINE)
meta = {}
body = clean_text
yaml_str = ""
# 2. Versuch A: Standard Split (Idealfall)
parts = re.split(r"^---+\s*$", clean_text, maxsplit=2, flags=re.MULTILINE)
if len(parts) >= 3:
yaml_str = parts[1]
body_candidate = parts[2]
body = parts[2]
# YAML Cleanup
yaml_str_clean = yaml_str.replace("#", "")
# 3. Versuch B: Healing (Wenn LLM das schließende --- vergessen hat)
elif clean_text.startswith("---"):
# Wir suchen die erste Überschrift '#', da Frontmatter davor sein muss
# Pattern: Suche --- am Anfang, dann nimm alles bis zum ersten # am Zeilenanfang
fallback_match = re.search(r"^---\s*(.*?)(?=\n#)", clean_text, re.DOTALL | re.MULTILINE)
if fallback_match:
yaml_str = fallback_match.group(1)
# Der Body ist alles NACH dem YAML String (inklusive dem #)
body = clean_text.replace(f"---{yaml_str}", "", 1).strip()
# 4. YAML Parsing
if yaml_str:
yaml_str_clean = yaml_str.replace("#", "") # Tags cleanen
try:
parsed = yaml.safe_load(yaml_str_clean)
if isinstance(parsed, dict):
meta = parsed
body = body_candidate.strip()
except Exception as e:
print(f"YAML Parsing Warning: {e}")
body = body_candidate.strip()
# Fallback: Titel aus H1 suchen, wenn nicht im YAML
# Fallback: Titel aus H1
if not meta.get("title"):
h1_match = re.search(r"^#\s+(.*)$", body, re.MULTILINE)
if h1_match:
@ -258,7 +259,7 @@ def submit_feedback(query_id, node_id, score, comment=None):
def render_sidebar():
with st.sidebar:
st.title("🧠 mindnet")
st.caption("v2.4.4 | Smart Filename")
st.caption("v2.5 | Healing Parser")
mode = st.radio("Modus", ["💬 Chat", "📝 Manueller Editor"], index=0)
st.divider()
st.subheader("⚙️ Settings")
@ -289,7 +290,7 @@ def render_draft_editor(msg):
if f"{key_base}_init" not in st.session_state:
meta, body = parse_markdown_draft(msg["content"])
if "type" not in meta: meta["type"] = "default"
if "title" not in meta: meta["title"] = "" # Kann leer sein
if "title" not in meta: meta["title"] = ""
tags = meta.get("tags", [])
meta["tags_str"] = ", ".join(tags) if isinstance(tags, list) else str(tags)
@ -412,7 +413,6 @@ def render_draft_editor(msg):
final_tags_str = st.session_state.get(f"{key_base}_wdg_tags", "")
final_tags = [t.strip() for t in final_tags_str.split(",") if t.strip()]
# Live Daten aus Widget (dies ist die Wahrheit!)
final_meta = {
"id": "generated_on_save",
"type": st.session_state.get(f"{key_base}_wdg_type", "default"),
@ -423,14 +423,11 @@ def render_draft_editor(msg):
final_body = st.session_state.get(widget_body_key, st.session_state[data_body_key])
# 1. Update Title in Meta (damit es im YAML landet)
if not final_meta["title"]:
# Fallback auf H1 im Text
h1_match = re.search(r"^#\s+(.*)$", final_body, re.MULTILINE)
if h1_match:
final_meta["title"] = h1_match.group(1).strip()
# 2. Build Doc
final_doc = build_markdown_doc(final_meta, final_body)
with tab_view:
@ -445,20 +442,13 @@ def render_draft_editor(msg):
if st.button("💾 Speichern & Indizieren", type="primary", key=f"{key_base}_save"):
with st.spinner("Speichere im Vault..."):
# --- DATEINAMEN INTELLIGENZ ---
# Prio 1: Meta Titel
title_for_slug = final_meta.get("title", "")
# Prio 2: Body Snippet (wenn Titel immer noch leer)
if not title_for_slug:
raw_title = final_meta.get("title", "")
if not raw_title:
clean_body = re.sub(r"[#*_\[\]()]", "", final_body).strip()
title_for_slug = clean_body[:40] if clean_body else "draft"
safe_title = slugify(title_for_slug)[:60]
if not safe_title: safe_title = "draft"
raw_title = clean_body[:40] if clean_body else "draft"
safe_title = slugify(raw_title)[:60] or "draft"
fname = f"{datetime.now().strftime('%Y%m%d')}-{safe_title}.md"
# -----------------------------
result = save_draft_to_vault(final_doc, filename=fname)
if "error" in result:

View File

@ -1,6 +1,6 @@
"""
app/services/llm_service.py LLM Client
Version: 2.7.0 (Clean Architecture: Explicit Priority Queues)
Version: 2.8.0 (Configurable Concurrency Limit)
"""
import httpx
@ -19,17 +19,28 @@ class Settings:
LLM_MODEL = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
PROMPTS_PATH = os.getenv("MINDNET_PROMPTS_PATH", "./config/prompts.yaml")
# NEU: Konfigurierbares Limit für Hintergrund-Last
# Default auf 2 (konservativ), kann in .env erhöht werden.
BACKGROUND_LIMIT = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
def get_settings():
return Settings()
class LLMService:
# GLOBALER SEMAPHOR (Drosselung für Hintergrund-Prozesse)
_background_semaphore = asyncio.Semaphore(2)
# GLOBALER SEMAPHOR (Lazy Initialization)
# Wir initialisieren ihn erst, wenn wir die Settings kennen.
_background_semaphore = None
def __init__(self):
self.settings = get_settings()
self.prompts = self._load_prompts()
# Initialisiere Semaphore einmalig auf Klassen-Ebene basierend auf Config
if LLMService._background_semaphore is None:
limit = self.settings.BACKGROUND_LIMIT
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
LLMService._background_semaphore = asyncio.Semaphore(limit)
self.timeout = httpx.Timeout(self.settings.LLM_TIMEOUT, connect=10.0)
self.client = httpx.AsyncClient(
@ -53,7 +64,7 @@ class LLMService:
force_json: bool = False,
max_retries: int = 0,
base_delay: float = 2.0,
priority: Literal["realtime", "background"] = "realtime" # <--- NEU & EXPLIZIT
priority: Literal["realtime", "background"] = "realtime"
) -> str:
"""
Führt einen LLM Call aus.
@ -61,13 +72,13 @@ class LLMService:
priority="background": Import/Analyse (Gedrosselt durch Semaphore).
"""
# Entscheidung basierend auf explizitem Parameter, nicht Format!
use_semaphore = (priority == "background")
if use_semaphore:
if use_semaphore and LLMService._background_semaphore:
async with LLMService._background_semaphore:
return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
else:
# Realtime oder Fallback (falls Semaphore Init fehlschlug)
return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
async def _execute_request(self, prompt, system, force_json, max_retries, base_delay):
@ -123,7 +134,7 @@ class LLMService:
system=system_prompt,
max_retries=0,
force_json=False,
priority="realtime" # <--- Standard
priority="realtime"
)
async def close(self):