modified: app/services/semantic_analyzer.py
This commit is contained in:
parent
37c0f526ec
commit
3629bc3fb9
|
|
@ -48,7 +48,7 @@ class SemanticAnalyzer:
|
||||||
user_prompt = f"Dokument-Typ: {source_type}\n\nTEXT:\n{text}"
|
user_prompt = f"Dokument-Typ: {source_type}\n\nTEXT:\n{text}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# WICHTIG: Erzwingt Ollama JSON Mode
|
# 2. LLM Call (Async)
|
||||||
response_json = await self.llm.generate_raw_response(
|
response_json = await self.llm.generate_raw_response(
|
||||||
user_prompt,
|
user_prompt,
|
||||||
system=system_prompt,
|
system=system_prompt,
|
||||||
|
|
@ -59,8 +59,18 @@ class SemanticAnalyzer:
|
||||||
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
||||||
data = json.loads(clean_json)
|
data = json.loads(clean_json)
|
||||||
|
|
||||||
|
# 3a. Typsicherheit des äußeren Arrays
|
||||||
|
if not isinstance(data, list):
|
||||||
|
logger.error("SemanticAnalyzer: JSON root ist kein Array. Fehlerhafte LLM-Antwort.")
|
||||||
|
raise ValueError("Root element is not a list.")
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for item in data:
|
for item in data:
|
||||||
|
# FIX: Typsicherheit auf Item-Ebene
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
logger.warning(f"SemanticAnalyzer: Ungültiges Chunk-Element ignoriert: {item}")
|
||||||
|
continue
|
||||||
|
|
||||||
content = item.get("content", "").strip()
|
content = item.get("content", "").strip()
|
||||||
if not content: continue
|
if not content: continue
|
||||||
|
|
||||||
|
|
@ -68,7 +78,7 @@ class SemanticAnalyzer:
|
||||||
refined_edges = []
|
refined_edges = []
|
||||||
|
|
||||||
for rel in raw_rels:
|
for rel in raw_rels:
|
||||||
# FIX: Sicherstellen, dass rel ein Dictionary ist, um 'str' object has no attribute 'get' zu verhindern
|
# Typsicherheit auf Relation-Ebene
|
||||||
if not isinstance(rel, dict):
|
if not isinstance(rel, dict):
|
||||||
logger.warning(f"SemanticAnalyzer: Ignoriere ungültige Relation: {rel}")
|
logger.warning(f"SemanticAnalyzer: Ignoriere ungültige Relation: {rel}")
|
||||||
continue
|
continue
|
||||||
|
|
@ -77,7 +87,7 @@ class SemanticAnalyzer:
|
||||||
raw_type = rel.get("type", "related_to")
|
raw_type = rel.get("type", "related_to")
|
||||||
|
|
||||||
if target:
|
if target:
|
||||||
# 1. Annahme: Hole den Typ der ZIEL-Entität aus dem Index (für Matrix-Logik)
|
# 1. Typ-Auflösung (für Matrix)
|
||||||
target_entity_type = self._get_target_type_from_title(target)
|
target_entity_type = self._get_target_type_from_title(target)
|
||||||
|
|
||||||
# 2. Matrix-Logik anwenden:
|
# 2. Matrix-Logik anwenden:
|
||||||
|
|
@ -87,7 +97,6 @@ class SemanticAnalyzer:
|
||||||
if final_kind not in ["related_to", "references"] and target_entity_type != "concept":
|
if final_kind not in ["related_to", "references"] and target_entity_type != "concept":
|
||||||
edge_str = f"{final_kind}:{target}"
|
edge_str = f"{final_kind}:{target}"
|
||||||
else:
|
else:
|
||||||
# Wenn Matrix oder LLM generisch war, nutzen wir den generischen Output des LLM.
|
|
||||||
edge_str = f"{raw_type}:{target}"
|
edge_str = f"{raw_type}:{target}"
|
||||||
|
|
||||||
refined_edges.append(edge_str)
|
refined_edges.append(edge_str)
|
||||||
|
|
@ -100,7 +109,7 @@ class SemanticAnalyzer:
|
||||||
logger.error("SemanticAnalyzer: LLM lieferte KEIN valides JSON. Fallback auf Raw Text.")
|
logger.error("SemanticAnalyzer: LLM lieferte KEIN valides JSON. Fallback auf Raw Text.")
|
||||||
return [SemanticChunkResult(content=text, suggested_edges=[])]
|
return [SemanticChunkResult(content=text, suggested_edges=[])]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Wichtig: Fehler im Loggen, damit wir wissen, warum es crashte (z.B. Timeout/Ressource)
|
# Wichtig: Wir fangen alle anderen Fehler, um den Prozess nicht abzubrechen.
|
||||||
logger.error(f"SemanticAnalyzer Unbehandelter Fehler: {e}")
|
logger.error(f"SemanticAnalyzer Unbehandelter Fehler: {e}")
|
||||||
return [SemanticChunkResult(content=text, suggested_edges=[])]
|
return [SemanticChunkResult(content=text, suggested_edges=[])]
|
||||||
|
|
||||||
|
|
@ -119,7 +128,6 @@ class SemanticAnalyzer:
|
||||||
if "leitbild-rituale-system" in title_lower:
|
if "leitbild-rituale-system" in title_lower:
|
||||||
return "concept"
|
return "concept"
|
||||||
|
|
||||||
# Fallback
|
|
||||||
return "concept"
|
return "concept"
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user