bug fix
This commit is contained in:
parent
a908853c30
commit
5278c75ac1
|
|
@ -3,8 +3,8 @@ FILE: app/core/ingestion.py
|
||||||
DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen.
|
DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen.
|
||||||
WP-20: Smart Edge Allocation via Hybrid LLM (OpenRouter/Gemini).
|
WP-20: Smart Edge Allocation via Hybrid LLM (OpenRouter/Gemini).
|
||||||
WP-22: Content Lifecycle, Edge Registry Validation & Multi-Hash.
|
WP-22: Content Lifecycle, Edge Registry Validation & Multi-Hash.
|
||||||
FIX: Bulletproof JSON Extraction & Prompt Formatting Safety.
|
FIX: Behebung des AttributeError und Härtung des Prompt-Formattings.
|
||||||
VERSION: 2.11.6
|
VERSION: 2.11.7
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
|
@ -56,7 +56,7 @@ def extract_json_from_response(text: str) -> Any:
|
||||||
try:
|
try:
|
||||||
return json.loads(clean_text.strip())
|
return json.loads(clean_text.strip())
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# Letzter Versuch: Alles vor der ersten [ und nach der letzten ] entfernen
|
# Versuch: Alles vor der ersten [ und nach der letzten ] entfernen
|
||||||
start = clean_text.find('[')
|
start = clean_text.find('[')
|
||||||
end = clean_text.rfind(']') + 1
|
end = clean_text.rfind(']') + 1
|
||||||
if start != -1 and end != 0:
|
if start != -1 and end != 0:
|
||||||
|
|
@ -120,6 +120,16 @@ class IngestionService:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"DB init warning: {e}")
|
logger.warning(f"DB init warning: {e}")
|
||||||
|
|
||||||
|
def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
|
||||||
|
"""Holt die Chunker-Parameter für ein spezifisches Profil."""
|
||||||
|
profiles = self.registry.get("chunking_profiles", {})
|
||||||
|
if profile_name in profiles:
|
||||||
|
cfg = profiles[profile_name].copy()
|
||||||
|
if "overlap" in cfg and isinstance(cfg["overlap"], list):
|
||||||
|
cfg["overlap"] = tuple(cfg["overlap"])
|
||||||
|
return cfg
|
||||||
|
return get_chunk_config(note_type)
|
||||||
|
|
||||||
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
|
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
|
||||||
"""Nutzt das Hybrid LLM für die semantische Kanten-Extraktion."""
|
"""Nutzt das Hybrid LLM für die semantische Kanten-Extraktion."""
|
||||||
provider = "openrouter" if self.settings.OPENROUTER_API_KEY else self.settings.MINDNET_LLM_PROVIDER
|
provider = "openrouter" if self.settings.OPENROUTER_API_KEY else self.settings.MINDNET_LLM_PROVIDER
|
||||||
|
|
@ -133,7 +143,7 @@ class IngestionService:
|
||||||
template = self.llm.get_prompt("edge_extraction", provider)
|
template = self.llm.get_prompt("edge_extraction", provider)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# FIX: Format-Safety Block
|
# FIX: Format-Safety Block gegen KeyError: '"to"'
|
||||||
try:
|
try:
|
||||||
prompt = template.format(
|
prompt = template.format(
|
||||||
text=text[:6000],
|
text=text[:6000],
|
||||||
|
|
@ -141,7 +151,7 @@ class IngestionService:
|
||||||
valid_types=valid_types_str
|
valid_types=valid_types_str
|
||||||
)
|
)
|
||||||
except KeyError as ke:
|
except KeyError as ke:
|
||||||
logger.error(f"❌ [Ingestion] Prompt-Template Fehler (Fehlende Maskierung in YAML?): {ke}")
|
logger.error(f"❌ [Ingestion] Prompt-Template Fehler (Variable {ke} fehlt). Prüfe prompts.yaml Maskierung.")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
response_json = await self.llm.generate_raw_response(
|
response_json = await self.llm.generate_raw_response(
|
||||||
|
|
@ -149,7 +159,6 @@ class IngestionService:
|
||||||
provider=provider, model_override=model
|
provider=provider, model_override=model
|
||||||
)
|
)
|
||||||
|
|
||||||
# FIX: Robustes JSON-Parsing
|
|
||||||
raw_data = extract_json_from_response(response_json)
|
raw_data = extract_json_from_response(response_json)
|
||||||
|
|
||||||
if isinstance(raw_data, dict):
|
if isinstance(raw_data, dict):
|
||||||
|
|
@ -162,7 +171,7 @@ class IngestionService:
|
||||||
|
|
||||||
processed = []
|
processed = []
|
||||||
for item in raw_data:
|
for item in raw_data:
|
||||||
# FIX: Typ-Check zur Vermeidung von 'str' object assignment errors
|
# FIX: Schutz vor 'str' object does not support item assignment
|
||||||
if isinstance(item, dict) and "to" in item:
|
if isinstance(item, dict) and "to" in item:
|
||||||
item["provenance"] = "semantic_ai"
|
item["provenance"] = "semantic_ai"
|
||||||
item["line"] = f"ai-{provider}"
|
item["line"] = f"ai-{provider}"
|
||||||
|
|
@ -186,6 +195,7 @@ class IngestionService:
|
||||||
force_replace: bool = False, apply: bool = False, purge_before: bool = False,
|
force_replace: bool = False, apply: bool = False, purge_before: bool = False,
|
||||||
note_scope_refs: bool = False, hash_source: str = "parsed", hash_normalize: str = "canonical"
|
note_scope_refs: bool = False, hash_source: str = "parsed", hash_normalize: str = "canonical"
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
|
"""Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen."""
|
||||||
result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
|
result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -228,6 +238,7 @@ class IngestionService:
|
||||||
body_text = getattr(parsed, "body", "") or ""
|
body_text = getattr(parsed, "body", "") or ""
|
||||||
if hasattr(edge_registry, "ensure_latest"): edge_registry.ensure_latest()
|
if hasattr(edge_registry, "ensure_latest"): edge_registry.ensure_latest()
|
||||||
|
|
||||||
|
# FIX: Behebung des AttributeError durch korrekten Aufruf der Klassenmethode
|
||||||
chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
|
chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
|
||||||
chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
|
chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
|
||||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# config/prompts.yaml — Final V2.5.1 (Hybrid & Multi-Provider Support)
|
# config/prompts.yaml — Final V2.5.2 (Strict Hybrid Support)
|
||||||
# WP-20: Optimierte Cloud-Templates.
|
# WP-20: Optimierte Cloud-Templates.
|
||||||
# FIX: Technische Maskierung (Doppel-Klammern) in Cloud-Sektionen zur Vermeidung von KeyError.
|
# FIX: Technische Maskierung (Doppel-Klammern) zur Vermeidung von KeyError: '"to"'.
|
||||||
# OLLAMA: Unverändert laut Benutzeranweisung.
|
# OLLAMA: Unverändert laut Benutzeranweisung.
|
||||||
|
|
||||||
system_prompt: |
|
system_prompt: |
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user