This commit is contained in:
Lars 2025-12-23 22:02:32 +01:00
parent a908853c30
commit 5278c75ac1
2 changed files with 20 additions and 9 deletions

View File

@ -3,8 +3,8 @@ FILE: app/core/ingestion.py
DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen.
WP-20: Smart Edge Allocation via Hybrid LLM (OpenRouter/Gemini).
WP-22: Content Lifecycle, Edge Registry Validation & Multi-Hash.
FIX: Bulletproof JSON Extraction & Prompt Formatting Safety.
VERSION: 2.11.6
FIX: Behebung des AttributeError und Härtung des Prompt-Formattings.
VERSION: 2.11.7
STATUS: Active
"""
import os
@ -56,7 +56,7 @@ def extract_json_from_response(text: str) -> Any:
try:
return json.loads(clean_text.strip())
except json.JSONDecodeError:
# Letzter Versuch: Alles vor der ersten [ und nach der letzten ] entfernen
# Versuch: Alles vor der ersten [ und nach der letzten ] entfernen
start = clean_text.find('[')
end = clean_text.rfind(']') + 1
if start != -1 and end != 0:
@ -120,6 +120,16 @@ class IngestionService:
except Exception as e:
logger.warning(f"DB init warning: {e}")
def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
"""Holt die Chunker-Parameter für ein spezifisches Profil."""
profiles = self.registry.get("chunking_profiles", {})
if profile_name in profiles:
cfg = profiles[profile_name].copy()
if "overlap" in cfg and isinstance(cfg["overlap"], list):
cfg["overlap"] = tuple(cfg["overlap"])
return cfg
return get_chunk_config(note_type)
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
"""Nutzt das Hybrid LLM für die semantische Kanten-Extraktion."""
provider = "openrouter" if self.settings.OPENROUTER_API_KEY else self.settings.MINDNET_LLM_PROVIDER
@ -133,7 +143,7 @@ class IngestionService:
template = self.llm.get_prompt("edge_extraction", provider)
try:
# FIX: Format-Safety Block
# FIX: Format-Safety Block gegen KeyError: '"to"'
try:
prompt = template.format(
text=text[:6000],
@ -141,7 +151,7 @@ class IngestionService:
valid_types=valid_types_str
)
except KeyError as ke:
logger.error(f"❌ [Ingestion] Prompt-Template Fehler (Fehlende Maskierung in YAML?): {ke}")
logger.error(f"❌ [Ingestion] Prompt-Template Fehler (Variable {ke} fehlt). Prüfe prompts.yaml Maskierung.")
return []
response_json = await self.llm.generate_raw_response(
@ -149,7 +159,6 @@ class IngestionService:
provider=provider, model_override=model
)
# FIX: Robustes JSON-Parsing
raw_data = extract_json_from_response(response_json)
if isinstance(raw_data, dict):
@ -162,7 +171,7 @@ class IngestionService:
processed = []
for item in raw_data:
# FIX: Typ-Check zur Vermeidung von 'str' object assignment errors
# FIX: Schutz vor 'str' object does not support item assignment
if isinstance(item, dict) and "to" in item:
item["provenance"] = "semantic_ai"
item["line"] = f"ai-{provider}"
@ -186,6 +195,7 @@ class IngestionService:
force_replace: bool = False, apply: bool = False, purge_before: bool = False,
note_scope_refs: bool = False, hash_source: str = "parsed", hash_normalize: str = "canonical"
) -> Dict[str, Any]:
"""Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen."""
result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
try:
@ -228,6 +238,7 @@ class IngestionService:
body_text = getattr(parsed, "body", "") or ""
if hasattr(edge_registry, "ensure_latest"): edge_registry.ensure_latest()
# FIX: Behebung des AttributeError durch korrekten Aufruf der Klassenmethode
chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)

View File

@ -1,6 +1,6 @@
# config/prompts.yaml — Final V2.5.1 (Hybrid & Multi-Provider Support)
# config/prompts.yaml — Final V2.5.2 (Strict Hybrid Support)
# WP-20: Optimierte Cloud-Templates.
# FIX: Technische Maskierung (Doppel-Klammern) in Cloud-Sektionen zur Vermeidung von KeyError.
# FIX: Technische Maskierung (Doppel-Klammern) zur Vermeidung von KeyError: '"to"'.
# OLLAMA: Unverändert laut Benutzeranweisung.
system_prompt: |