logging für import_markdown
This commit is contained in:
parent
1e3fcc1633
commit
d25d623b9c
|
|
@ -1,11 +1,11 @@
|
||||||
"""
|
"""
|
||||||
app/services/semantic_analyzer.py — Edge Validation & Filtering
|
app/services/semantic_analyzer.py — Edge Validation & Filtering
|
||||||
Version: 1.1 (Robust JSON Parsing)
|
Version: 1.2 (Extended Observability & Debugging)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Optional
|
from typing import List, Optional, Any
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
# Importe
|
# Importe
|
||||||
|
|
@ -21,6 +21,7 @@ class SemanticAnalyzer:
|
||||||
"""
|
"""
|
||||||
Sendet einen Chunk und eine Liste potenzieller Kanten an das LLM.
|
Sendet einen Chunk und eine Liste potenzieller Kanten an das LLM.
|
||||||
Das LLM filtert heraus, welche Kanten für diesen Chunk relevant sind.
|
Das LLM filtert heraus, welche Kanten für diesen Chunk relevant sind.
|
||||||
|
Enthält erweitertes Logging für Debugging.
|
||||||
"""
|
"""
|
||||||
if not all_edges:
|
if not all_edges:
|
||||||
return []
|
return []
|
||||||
|
|
@ -28,8 +29,8 @@ class SemanticAnalyzer:
|
||||||
# 1. Prompt laden
|
# 1. Prompt laden
|
||||||
prompt_template = self.llm.prompts.get("edge_allocation_template")
|
prompt_template = self.llm.prompts.get("edge_allocation_template")
|
||||||
|
|
||||||
# Fallback, falls Prompt nicht in YAML definiert ist (für Tests ohne volle Config)
|
|
||||||
if not prompt_template:
|
if not prompt_template:
|
||||||
|
logger.warning("⚠️ Prompt 'edge_allocation_template' fehlt. Nutze Fallback-Prompt.")
|
||||||
prompt_template = (
|
prompt_template = (
|
||||||
"TASK: Wähle aus den Kandidaten die relevanten Kanten für den Text.\n"
|
"TASK: Wähle aus den Kandidaten die relevanten Kanten für den Text.\n"
|
||||||
"TEXT: {chunk_text}\n"
|
"TEXT: {chunk_text}\n"
|
||||||
|
|
@ -39,6 +40,9 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
# 2. Kandidaten-Liste formatieren
|
# 2. Kandidaten-Liste formatieren
|
||||||
edges_str = "\n".join([f"- {e}" for e in all_edges])
|
edges_str = "\n".join([f"- {e}" for e in all_edges])
|
||||||
|
|
||||||
|
# LOG: Request Info
|
||||||
|
logger.debug(f"🔍 [SemanticAnalyzer] Request: {len(chunk_text)} chars Text, {len(all_edges)} Candidates.")
|
||||||
|
|
||||||
# 3. Prompt füllen
|
# 3. Prompt füllen
|
||||||
final_prompt = prompt_template.format(
|
final_prompt = prompt_template.format(
|
||||||
|
|
@ -53,11 +57,26 @@ class SemanticAnalyzer:
|
||||||
force_json=True
|
force_json=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# LOG: Raw Response (nur die ersten 200 Zeichen, um Log nicht zu fluten, außer bei Fehler)
|
||||||
|
logger.debug(f"📥 [SemanticAnalyzer] Raw Response (Preview): {response_json[:200]}...")
|
||||||
|
|
||||||
# 5. Parsing & Cleaning
|
# 5. Parsing & Cleaning
|
||||||
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
||||||
if not clean_json: return []
|
|
||||||
|
if not clean_json:
|
||||||
|
logger.warning("⚠️ [SemanticAnalyzer] Leere Antwort vom LLM erhalten. Trigger Fallback.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(clean_json)
|
||||||
|
except json.JSONDecodeError as json_err:
|
||||||
|
# LOG: Detaillierter Fehlerbericht für den User
|
||||||
|
logger.error(f"❌ [SemanticAnalyzer] JSON Decode Error.")
|
||||||
|
logger.error(f" Grund: {json_err}")
|
||||||
|
logger.error(f" Empfangener String: {clean_json}")
|
||||||
|
logger.info(" -> Workaround: Fallback auf 'Alle Kanten' (durch Chunker).")
|
||||||
|
return []
|
||||||
|
|
||||||
data = json.loads(clean_json)
|
|
||||||
valid_edges = []
|
valid_edges = []
|
||||||
|
|
||||||
# 6. Robuste Validierung (List vs Dict)
|
# 6. Robuste Validierung (List vs Dict)
|
||||||
|
|
@ -67,14 +86,15 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
elif isinstance(data, dict):
|
elif isinstance(data, dict):
|
||||||
# Abweichende Formate behandeln
|
# Abweichende Formate behandeln
|
||||||
|
logger.info(f"ℹ️ [SemanticAnalyzer] LLM lieferte Dict statt Liste. Versuche Reparatur. Keys: {list(data.keys())}")
|
||||||
|
|
||||||
for key, val in data.items():
|
for key, val in data.items():
|
||||||
# Fall A: {"edges": ["kind:target"]}
|
# Fall A: {"edges": ["kind:target"]}
|
||||||
if key.lower() in ["edges", "results", "kanten"] and isinstance(val, list):
|
if key.lower() in ["edges", "results", "kanten", "matches"] and isinstance(val, list):
|
||||||
valid_edges.extend([str(e) for e in val if isinstance(e, str) and ":" in e])
|
valid_edges.extend([str(e) for e in val if isinstance(e, str) and ":" in e])
|
||||||
|
|
||||||
# Fall B: {"kind": "target"} (Das beobachtete Format im Log)
|
# Fall B: {"kind": "target"} (Das beobachtete Format im Log)
|
||||||
elif isinstance(val, str):
|
elif isinstance(val, str):
|
||||||
# Wir rekonstruieren "kind:target"
|
|
||||||
valid_edges.append(f"{key}:{val}")
|
valid_edges.append(f"{key}:{val}")
|
||||||
|
|
||||||
# Fall C: {"kind": ["target1", "target2"]}
|
# Fall C: {"kind": ["target1", "target2"]}
|
||||||
|
|
@ -84,13 +104,18 @@ class SemanticAnalyzer:
|
||||||
valid_edges.append(f"{key}:{target}")
|
valid_edges.append(f"{key}:{target}")
|
||||||
|
|
||||||
# Safety: Filtere nur Kanten, die halbwegs valide aussehen
|
# Safety: Filtere nur Kanten, die halbwegs valide aussehen
|
||||||
return [e for e in valid_edges if ":" in e]
|
final_result = [e for e in valid_edges if ":" in e]
|
||||||
|
|
||||||
|
# LOG: Ergebnis
|
||||||
|
if final_result:
|
||||||
|
logger.info(f"✅ [SemanticAnalyzer] Success. {len(final_result)} Kanten zugewiesen.")
|
||||||
|
else:
|
||||||
|
logger.debug(" [SemanticAnalyzer] Keine spezifischen Kanten erkannt (Empty Result).")
|
||||||
|
|
||||||
|
return final_result
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("SemanticAnalyzer: LLM lieferte kein valides JSON. Ignoriere Zuweisung.")
|
|
||||||
return []
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"SemanticAnalyzer Error: {e}")
|
logger.error(f"💥 [SemanticAnalyzer] Kritischer Fehler: {e}", exc_info=True)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,13 @@ import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
import logging
|
||||||
|
# Setzt das Level global auf INFO, damit Sie den Fortschritt sehen
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||||
|
|
||||||
|
# Wenn Sie TIEFE Einblicke wollen, setzen Sie den SemanticAnalyzer spezifisch auf DEBUG:
|
||||||
|
logging.getLogger("app.services.semantic_analyzer").setLevel(logging.DEBUG)
|
||||||
|
|
||||||
# Importiere den neuen Async Service
|
# Importiere den neuen Async Service
|
||||||
# Stellen wir sicher, dass der Pfad stimmt (Pythonpath)
|
# Stellen wir sicher, dass der Pfad stimmt (Pythonpath)
|
||||||
import sys
|
import sys
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user