Enhance logging capabilities across multiple modules for version 4.5.0: Introduce detailed debug statements in decision_engine.py, retriever_scoring.py, retriever.py, and logging_setup.py to improve traceability during retrieval processes. Implement dynamic log level configuration based on environment variables, allowing for more flexible debugging and monitoring of application behavior.
This commit is contained in:
parent
2d87f9d816
commit
3e27c72b80
|
|
@ -2,36 +2,52 @@ import logging
|
||||||
import os
|
import os
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging(log_level: int = None):
|
||||||
# 1. Log-Verzeichnis erstellen (falls nicht vorhanden)
|
"""
|
||||||
|
Konfiguriert das Logging-System mit File- und Console-Handler.
|
||||||
|
WP-24c v4.4.0-DEBUG: Unterstützt DEBUG-Level für End-to-End Tracing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
log_level: Optionales Log-Level (logging.DEBUG, logging.INFO, etc.)
|
||||||
|
Falls nicht gesetzt, wird aus DEBUG Umgebungsvariable gelesen.
|
||||||
|
"""
|
||||||
|
# 1. Log-Level bestimmen
|
||||||
|
if log_level is None:
|
||||||
|
# WP-24c v4.4.0-DEBUG: Unterstützung für DEBUG-Level via Umgebungsvariable
|
||||||
|
debug_mode = os.getenv("DEBUG", "false").lower() == "true"
|
||||||
|
log_level = logging.DEBUG if debug_mode else logging.INFO
|
||||||
|
|
||||||
|
# 2. Log-Verzeichnis erstellen (falls nicht vorhanden)
|
||||||
log_dir = "logs"
|
log_dir = "logs"
|
||||||
if not os.path.exists(log_dir):
|
if not os.path.exists(log_dir):
|
||||||
os.makedirs(log_dir)
|
os.makedirs(log_dir)
|
||||||
|
|
||||||
log_file = os.path.join(log_dir, "mindnet.log")
|
log_file = os.path.join(log_dir, "mindnet.log")
|
||||||
|
|
||||||
# 2. Formatter definieren (Zeitstempel | Level | Modul | Nachricht)
|
# 3. Formatter definieren (Zeitstempel | Level | Modul | Nachricht)
|
||||||
formatter = logging.Formatter(
|
formatter = logging.Formatter(
|
||||||
'%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
|
'%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
|
||||||
datefmt='%Y-%m-%d %H:%M:%S'
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. File Handler: Schreibt in Datei (max. 5MB pro Datei, behält 5 Backups)
|
# 4. File Handler: Schreibt in Datei (max. 5MB pro Datei, behält 5 Backups)
|
||||||
file_handler = RotatingFileHandler(
|
file_handler = RotatingFileHandler(
|
||||||
log_file, maxBytes=5*1024*1024, backupCount=5, encoding='utf-8'
|
log_file, maxBytes=5*1024*1024, backupCount=5, encoding='utf-8'
|
||||||
)
|
)
|
||||||
file_handler.setFormatter(formatter)
|
file_handler.setFormatter(formatter)
|
||||||
file_handler.setLevel(logging.INFO)
|
file_handler.setLevel(log_level) # WP-24c v4.4.0-DEBUG: Respektiert log_level
|
||||||
|
|
||||||
# 4. Stream Handler: Schreibt weiterhin auf die Konsole
|
# 5. Stream Handler: Schreibt weiterhin auf die Konsole
|
||||||
console_handler = logging.StreamHandler()
|
console_handler = logging.StreamHandler()
|
||||||
console_handler.setFormatter(formatter)
|
console_handler.setFormatter(formatter)
|
||||||
console_handler.setLevel(logging.INFO)
|
console_handler.setLevel(log_level) # WP-24c v4.4.0-DEBUG: Respektiert log_level
|
||||||
|
|
||||||
# 5. Root Logger konfigurieren
|
# 6. Root Logger konfigurieren
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=log_level,
|
||||||
handlers=[file_handler, console_handler]
|
handlers=[file_handler, console_handler],
|
||||||
|
force=True # Überschreibt bestehende Konfiguration
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"📝 Logging initialized. Writing to {log_file}")
|
level_name = "DEBUG" if log_level == logging.DEBUG else "INFO"
|
||||||
|
logging.info(f"📝 Logging initialized (Level: {level_name}). Writing to {log_file}")
|
||||||
|
|
@ -211,7 +211,23 @@ class DecisionEngine:
|
||||||
explain=True
|
explain=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung vor der Suche
|
||||||
|
logger.info(f"🔍 [RETRIEVAL] Starte Stream: '{name}'")
|
||||||
|
logger.info(f" -> Transformierte Query: '{transformed_query}'")
|
||||||
|
logger.debug(f" ⚙️ [FILTER] Angewandte Metadaten-Filter: {request.filters}")
|
||||||
|
logger.debug(f" ⚙️ [FILTER] Top-K: {request.top_k}, Expand-Depth: {request.expand.get('depth') if request.expand else None}")
|
||||||
|
|
||||||
response = await self.retriever.search(request)
|
response = await self.retriever.search(request)
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung nach der Suche
|
||||||
|
if not response.results:
|
||||||
|
logger.warning(f"⚠️ [EMPTY] Stream '{name}' lieferte 0 Ergebnisse.")
|
||||||
|
else:
|
||||||
|
logger.info(f"✨ [SUCCESS] Stream '{name}' lieferte {len(response.results)} Treffer.")
|
||||||
|
# Top 3 Treffer im DEBUG-Level loggen
|
||||||
|
for i, hit in enumerate(response.results[:3]):
|
||||||
|
logger.debug(f" [{i+1}] Chunk: {hit.chunk_id} | Score: {hit.score:.4f} | Path: {hit.source.get('path', 'N/A')}")
|
||||||
|
|
||||||
for hit in response.results:
|
for hit in response.results:
|
||||||
hit.stream_origin = name
|
hit.stream_origin = name
|
||||||
return response
|
return response
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,20 @@ def _semantic_hits(
|
||||||
filters = {"section": target_section}
|
filters = {"section": target_section}
|
||||||
|
|
||||||
raw_hits = qp.search_chunks_by_vector(client, prefix, vector, top=top_k, filters=filters)
|
raw_hits = qp.search_chunks_by_vector(client, prefix, vector, top=top_k, filters=filters)
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der rohen Qdrant-Antwort
|
||||||
|
logger.debug(f"📊 [RAW-HITS] Qdrant lieferte {len(raw_hits)} Roh-Treffer (Top-K: {top_k})")
|
||||||
|
if filters:
|
||||||
|
logger.debug(f" ⚙️ [FILTER] Angewandte Filter: {filters}")
|
||||||
|
|
||||||
|
# Logge die Top 3 Roh-Scores für Diagnose
|
||||||
|
for i, hit in enumerate(raw_hits[:3]):
|
||||||
|
hit_id = str(hit[0]) if hit else "N/A"
|
||||||
|
hit_score = float(hit[1]) if hit and len(hit) > 1 else 0.0
|
||||||
|
hit_payload = dict(hit[2] or {}) if hit and len(hit) > 2 else {}
|
||||||
|
hit_path = hit_payload.get('path', 'N/A')
|
||||||
|
logger.debug(f" [{i+1}] ID: {hit_id} | Raw-Score: {hit_score:.4f} | Path: {hit_path}")
|
||||||
|
|
||||||
# Strikte Typkonvertierung für Stabilität
|
# Strikte Typkonvertierung für Stabilität
|
||||||
return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
|
return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
|
||||||
|
|
||||||
|
|
@ -335,14 +349,30 @@ def _build_hits_from_semantic(
|
||||||
source_chunk_id=source_chunk_id # WP-24c v4.1.0: RAG-Kontext
|
source_chunk_id=source_chunk_id # WP-24c v4.1.0: RAG-Kontext
|
||||||
))
|
))
|
||||||
|
|
||||||
return QueryResponse(results=results, used_mode=used_mode, latency_ms=int((time.time() - t0) * 1000))
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Finale Ergebnisse
|
||||||
|
latency_ms = int((time.time() - t0) * 1000)
|
||||||
|
if not results:
|
||||||
|
logger.warning(f"⚠️ [EMPTY] Hybride Suche lieferte 0 Ergebnisse (Latency: {latency_ms}ms)")
|
||||||
|
else:
|
||||||
|
logger.info(f"✨ [SUCCESS] Hybride Suche lieferte {len(results)} Treffer (Latency: {latency_ms}ms)")
|
||||||
|
# Top 3 finale Scores loggen
|
||||||
|
for i, hit in enumerate(results[:3]):
|
||||||
|
logger.debug(f" [{i+1}] Final: Chunk={hit.chunk_id} | Total-Score={hit.total_score:.4f} | Semantic={hit.semantic_score:.4f} | Edge={hit.edge_bonus:.4f}")
|
||||||
|
|
||||||
|
return QueryResponse(results=results, used_mode=used_mode, latency_ms=latency_ms)
|
||||||
|
|
||||||
|
|
||||||
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
"""
|
"""
|
||||||
Die Haupt-Einstiegsfunktion für die hybride Suche.
|
Die Haupt-Einstiegsfunktion für die hybride Suche.
|
||||||
WP-15c: Implementiert Edge-Aggregation (Super-Kanten).
|
WP-15c: Implementiert Edge-Aggregation (Super-Kanten).
|
||||||
|
WP-24c v4.5.0-DEBUG: Retrieval-Tracer für Diagnose.
|
||||||
"""
|
"""
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Start der hybriden Suche
|
||||||
|
logger.info(f"🔍 [RETRIEVAL] Starte hybride Suche")
|
||||||
|
logger.info(f" -> Query: '{req.query[:100]}...' (Länge: {len(req.query)})")
|
||||||
|
logger.debug(f" ⚙️ [FILTER] Request-Filter: {req.filters}")
|
||||||
|
logger.debug(f" ⚙️ [FILTER] Top-K: {req.top_k}, Expand: {req.expand}, Target-Section: {req.target_section}")
|
||||||
client, prefix = _get_client_and_prefix()
|
client, prefix = _get_client_and_prefix()
|
||||||
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
|
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
|
||||||
top_k = req.top_k or 10
|
top_k = req.top_k or 10
|
||||||
|
|
@ -350,8 +380,15 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
# 1. Semantische Seed-Suche (Wir laden etwas mehr für das Pooling)
|
# 1. Semantische Seed-Suche (Wir laden etwas mehr für das Pooling)
|
||||||
# WP-24c v4.1.0: Section-Filtering unterstützen
|
# WP-24c v4.1.0: Section-Filtering unterstützen
|
||||||
target_section = getattr(req, "target_section", None)
|
target_section = getattr(req, "target_section", None)
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Vor semantischer Suche
|
||||||
|
logger.debug(f"🔍 [RETRIEVAL] Starte semantische Seed-Suche (Top-K: {top_k * 3}, Target-Section: {target_section})")
|
||||||
|
|
||||||
hits = _semantic_hits(client, prefix, vector, top_k=top_k * 3, filters=req.filters, target_section=target_section)
|
hits = _semantic_hits(client, prefix, vector, top_k=top_k * 3, filters=req.filters, target_section=target_section)
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Nach semantischer Suche
|
||||||
|
logger.debug(f"📊 [SEED-HITS] Semantische Suche lieferte {len(hits)} Seed-Treffer")
|
||||||
|
|
||||||
# 2. Graph Expansion Konfiguration
|
# 2. Graph Expansion Konfiguration
|
||||||
expand_cfg = req.expand if isinstance(req.expand, dict) else {}
|
expand_cfg = req.expand if isinstance(req.expand, dict) else {}
|
||||||
depth = int(expand_cfg.get("depth", 1))
|
depth = int(expand_cfg.get("depth", 1))
|
||||||
|
|
@ -470,7 +507,21 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
||||||
# 3. Scoring & Explanation Generierung
|
# 3. Scoring & Explanation Generierung
|
||||||
# top_k wird erst hier final angewandt
|
# top_k wird erst hier final angewandt
|
||||||
return _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges)
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Vor finaler Hit-Erstellung
|
||||||
|
if subgraph:
|
||||||
|
logger.debug(f"📊 [GRAPH] Subgraph enthält {len(subgraph.edges)} Kanten für {len(seed_ids)} Seed-Notizen")
|
||||||
|
else:
|
||||||
|
logger.debug(f"📊 [GRAPH] Kein Subgraph (depth=0 oder keine Seed-IDs)")
|
||||||
|
|
||||||
|
result = _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges)
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Nach finaler Hit-Erstellung
|
||||||
|
if not result.results:
|
||||||
|
logger.warning(f"⚠️ [EMPTY] Hybride Suche lieferte nach Scoring 0 finale Ergebnisse")
|
||||||
|
else:
|
||||||
|
logger.info(f"✨ [SUCCESS] Hybride Suche lieferte {len(result.results)} finale Treffer (Mode: {result.used_mode})")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,11 @@ def compute_wp22_score(
|
||||||
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
|
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
|
||||||
final_score = max(0.0001, float(total))
|
final_score = max(0.0001, float(total))
|
||||||
|
|
||||||
|
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der Score-Berechnung
|
||||||
|
chunk_id = payload.get("chunk_id", payload.get("id", "unknown"))
|
||||||
|
logger.debug(f"📈 [SCORE-TRACE] Chunk: {chunk_id} | Base: {base_val:.4f} | Multiplier: {total_boost:.2f} | Final: {final_score:.4f}")
|
||||||
|
logger.debug(f" -> Details: StatusMult={status_mult:.2f}, TypeImpact={type_impact:.2f}, EdgeImpact={edge_impact_final:.4f}, CentImpact={cent_impact_final:.4f}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total": final_score,
|
"total": final_score,
|
||||||
"edge_bonus": float(edge_bonus_raw),
|
"edge_bonus": float(edge_bonus_raw),
|
||||||
|
|
|
||||||
|
|
@ -71,11 +71,17 @@ from pathlib import Path
|
||||||
from typing import List, Dict, Any
|
from typing import List, Dict, Any
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Root Logger Setup: INFO-Level für volle Transparenz der fachlichen Prozesse
|
# Root Logger Setup: Nutzt zentrale setup_logging() Funktion
|
||||||
logging.basicConfig(
|
# WP-24c v4.4.0-DEBUG: Aktiviert DEBUG-Level für End-to-End Tracing
|
||||||
level=logging.INFO,
|
# Kann auch über Umgebungsvariable DEBUG=true gesteuert werden
|
||||||
format='%(asctime)s [%(levelname)s] %(message)s'
|
from app.core.logging_setup import setup_logging
|
||||||
)
|
|
||||||
|
# Bestimme Log-Level basierend auf DEBUG Umgebungsvariable
|
||||||
|
debug_mode = os.getenv("DEBUG", "false").lower() == "true"
|
||||||
|
log_level = logging.DEBUG if debug_mode else logging.INFO
|
||||||
|
|
||||||
|
# Nutze zentrale Logging-Konfiguration (File + Console)
|
||||||
|
setup_logging(log_level=log_level)
|
||||||
|
|
||||||
# Sicherstellung, dass das Root-Verzeichnis im Python-Pfad liegt
|
# Sicherstellung, dass das Root-Verzeichnis im Python-Pfad liegt
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user