anpassung an variable Kontextgrenzen für Ollama
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 5s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 5s
This commit is contained in:
parent
6ac1f318d0
commit
f686ecf947
|
|
@ -34,6 +34,8 @@ class Settings:
|
||||||
# --- WP-20 Hybrid LLM Provider ---
|
# --- WP-20 Hybrid LLM Provider ---
|
||||||
# Erlaubt: "ollama" | "gemini" | "openrouter"
|
# Erlaubt: "ollama" | "gemini" | "openrouter"
|
||||||
MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "openrouter").lower()
|
MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "openrouter").lower()
|
||||||
|
# Standardwert 10000, falls nichts in der .env steht
|
||||||
|
MAX_OLLAMA_CHARS: int = int(os.getenv("MAX_OLLAMA_CHARS", 10000))
|
||||||
|
|
||||||
# Google AI Studio (2025er Lite-Modell für höhere Kapazität)
|
# Google AI Studio (2025er Lite-Modell für höhere Kapazität)
|
||||||
GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
|
GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
|
||||||
|
|
|
||||||
|
|
@ -310,10 +310,13 @@ async def chat_endpoint(
|
||||||
|
|
||||||
# --- STABILITY FIX: OLLAMA CONTEXT THROTTLE ---
|
# --- STABILITY FIX: OLLAMA CONTEXT THROTTLE ---
|
||||||
# Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden.
|
# Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden.
|
||||||
MAX_OLLAMA_CHARS = 10000
|
# MAX_OLLAMA_CHARS = 10000
|
||||||
if preferred_provider == "ollama" and len(context_str) > MAX_OLLAMA_CHARS:
|
|
||||||
logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {MAX_OLLAMA_CHARS}.")
|
settings = get_settings() # Falls noch nicht im Scope vorhanden
|
||||||
context_str = context_str[:MAX_OLLAMA_CHARS] + "\n[...gekürzt zur Stabilität...]"
|
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
|
||||||
|
if preferred_provider == "ollama" and len(context_str) > max_chars:
|
||||||
|
logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {max_chars}.")
|
||||||
|
context_str = context_str[:max_chars] + "\n[...gekürzt zur Stabilität...]"
|
||||||
|
|
||||||
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
|
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user