Enhance logging capabilities across multiple modules for version 4.5.0: Introduce detailed debug statements in decision_engine.py, retriever_scoring.py, retriever.py, and logging_setup.py to improve traceability during retrieval processes. Implement dynamic log level configuration based on environment variables, allowing for more flexible debugging and monitoring of application behavior.

This commit is contained in:
Lars 2026-01-11 17:30:34 +01:00
parent 2d87f9d816
commit 3e27c72b80
5 changed files with 112 additions and 18 deletions

View File

@ -2,36 +2,52 @@ import logging
import os
from logging.handlers import RotatingFileHandler
def setup_logging():
# 1. Log-Verzeichnis erstellen (falls nicht vorhanden)
def setup_logging(log_level: int = None):
"""
Konfiguriert das Logging-System mit File- und Console-Handler.
WP-24c v4.4.0-DEBUG: Unterstützt DEBUG-Level für End-to-End Tracing.
Args:
log_level: Optionales Log-Level (logging.DEBUG, logging.INFO, etc.)
Falls nicht gesetzt, wird aus DEBUG Umgebungsvariable gelesen.
"""
# 1. Log-Level bestimmen
if log_level is None:
# WP-24c v4.4.0-DEBUG: Unterstützung für DEBUG-Level via Umgebungsvariable
debug_mode = os.getenv("DEBUG", "false").lower() == "true"
log_level = logging.DEBUG if debug_mode else logging.INFO
# 2. Log-Verzeichnis erstellen (falls nicht vorhanden)
log_dir = "logs"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
log_file = os.path.join(log_dir, "mindnet.log")
# 2. Formatter definieren (Zeitstempel | Level | Modul | Nachricht)
# 3. Formatter definieren (Zeitstempel | Level | Modul | Nachricht)
formatter = logging.Formatter(
'%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# 3. File Handler: Schreibt in Datei (max. 5MB pro Datei, behält 5 Backups)
# 4. File Handler: Schreibt in Datei (max. 5MB pro Datei, behält 5 Backups)
file_handler = RotatingFileHandler(
log_file, maxBytes=5*1024*1024, backupCount=5, encoding='utf-8'
)
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.INFO)
file_handler.setLevel(log_level) # WP-24c v4.4.0-DEBUG: Respektiert log_level
# 4. Stream Handler: Schreibt weiterhin auf die Konsole
# 5. Stream Handler: Schreibt weiterhin auf die Konsole
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
console_handler.setLevel(logging.INFO)
console_handler.setLevel(log_level) # WP-24c v4.4.0-DEBUG: Respektiert log_level
# 5. Root Logger konfigurieren
# 6. Root Logger konfigurieren
logging.basicConfig(
level=logging.INFO,
handlers=[file_handler, console_handler]
level=log_level,
handlers=[file_handler, console_handler],
force=True # Überschreibt bestehende Konfiguration
)
logging.info(f"📝 Logging initialized. Writing to {log_file}")
level_name = "DEBUG" if log_level == logging.DEBUG else "INFO"
logging.info(f"📝 Logging initialized (Level: {level_name}). Writing to {log_file}")

View File

@ -211,7 +211,23 @@ class DecisionEngine:
explain=True
)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung vor der Suche
logger.info(f"🔍 [RETRIEVAL] Starte Stream: '{name}'")
logger.info(f" -> Transformierte Query: '{transformed_query}'")
logger.debug(f" ⚙️ [FILTER] Angewandte Metadaten-Filter: {request.filters}")
logger.debug(f" ⚙️ [FILTER] Top-K: {request.top_k}, Expand-Depth: {request.expand.get('depth') if request.expand else None}")
response = await self.retriever.search(request)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung nach der Suche
if not response.results:
logger.warning(f"⚠️ [EMPTY] Stream '{name}' lieferte 0 Ergebnisse.")
else:
logger.info(f"✨ [SUCCESS] Stream '{name}' lieferte {len(response.results)} Treffer.")
# Top 3 Treffer im DEBUG-Level loggen
for i, hit in enumerate(response.results[:3]):
logger.debug(f" [{i+1}] Chunk: {hit.chunk_id} | Score: {hit.score:.4f} | Path: {hit.source.get('path', 'N/A')}")
for hit in response.results:
hit.stream_origin = name
return response

View File

@ -126,6 +126,20 @@ def _semantic_hits(
filters = {"section": target_section}
raw_hits = qp.search_chunks_by_vector(client, prefix, vector, top=top_k, filters=filters)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der rohen Qdrant-Antwort
logger.debug(f"📊 [RAW-HITS] Qdrant lieferte {len(raw_hits)} Roh-Treffer (Top-K: {top_k})")
if filters:
logger.debug(f" ⚙️ [FILTER] Angewandte Filter: {filters}")
# Logge die Top 3 Roh-Scores für Diagnose
for i, hit in enumerate(raw_hits[:3]):
hit_id = str(hit[0]) if hit else "N/A"
hit_score = float(hit[1]) if hit and len(hit) > 1 else 0.0
hit_payload = dict(hit[2] or {}) if hit and len(hit) > 2 else {}
hit_path = hit_payload.get('path', 'N/A')
logger.debug(f" [{i+1}] ID: {hit_id} | Raw-Score: {hit_score:.4f} | Path: {hit_path}")
# Strikte Typkonvertierung für Stabilität
return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
@ -335,14 +349,30 @@ def _build_hits_from_semantic(
source_chunk_id=source_chunk_id # WP-24c v4.1.0: RAG-Kontext
))
return QueryResponse(results=results, used_mode=used_mode, latency_ms=int((time.time() - t0) * 1000))
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Finale Ergebnisse
latency_ms = int((time.time() - t0) * 1000)
if not results:
logger.warning(f"⚠️ [EMPTY] Hybride Suche lieferte 0 Ergebnisse (Latency: {latency_ms}ms)")
else:
logger.info(f"✨ [SUCCESS] Hybride Suche lieferte {len(results)} Treffer (Latency: {latency_ms}ms)")
# Top 3 finale Scores loggen
for i, hit in enumerate(results[:3]):
logger.debug(f" [{i+1}] Final: Chunk={hit.chunk_id} | Total-Score={hit.total_score:.4f} | Semantic={hit.semantic_score:.4f} | Edge={hit.edge_bonus:.4f}")
return QueryResponse(results=results, used_mode=used_mode, latency_ms=latency_ms)
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
"""
Die Haupt-Einstiegsfunktion für die hybride Suche.
WP-15c: Implementiert Edge-Aggregation (Super-Kanten).
WP-24c v4.5.0-DEBUG: Retrieval-Tracer für Diagnose.
"""
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Start der hybriden Suche
logger.info(f"🔍 [RETRIEVAL] Starte hybride Suche")
logger.info(f" -> Query: '{req.query[:100]}...' (Länge: {len(req.query)})")
logger.debug(f" ⚙️ [FILTER] Request-Filter: {req.filters}")
logger.debug(f" ⚙️ [FILTER] Top-K: {req.top_k}, Expand: {req.expand}, Target-Section: {req.target_section}")
client, prefix = _get_client_and_prefix()
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
top_k = req.top_k or 10
@ -350,8 +380,15 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
# 1. Semantische Seed-Suche (Wir laden etwas mehr für das Pooling)
# WP-24c v4.1.0: Section-Filtering unterstützen
target_section = getattr(req, "target_section", None)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Vor semantischer Suche
logger.debug(f"🔍 [RETRIEVAL] Starte semantische Seed-Suche (Top-K: {top_k * 3}, Target-Section: {target_section})")
hits = _semantic_hits(client, prefix, vector, top_k=top_k * 3, filters=req.filters, target_section=target_section)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Nach semantischer Suche
logger.debug(f"📊 [SEED-HITS] Semantische Suche lieferte {len(hits)} Seed-Treffer")
# 2. Graph Expansion Konfiguration
expand_cfg = req.expand if isinstance(req.expand, dict) else {}
depth = int(expand_cfg.get("depth", 1))
@ -470,7 +507,21 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
# 3. Scoring & Explanation Generierung
# top_k wird erst hier final angewandt
return _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Vor finaler Hit-Erstellung
if subgraph:
logger.debug(f"📊 [GRAPH] Subgraph enthält {len(subgraph.edges)} Kanten für {len(seed_ids)} Seed-Notizen")
else:
logger.debug(f"📊 [GRAPH] Kein Subgraph (depth=0 oder keine Seed-IDs)")
result = _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Nach finaler Hit-Erstellung
if not result.results:
logger.warning(f"⚠️ [EMPTY] Hybride Suche lieferte nach Scoring 0 finale Ergebnisse")
else:
logger.info(f"✨ [SUCCESS] Hybride Suche lieferte {len(result.results)} finale Treffer (Mode: {result.used_mode})")
return result
def semantic_retrieve(req: QueryRequest) -> QueryResponse:

View File

@ -110,6 +110,11 @@ def compute_wp22_score(
# Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
final_score = max(0.0001, float(total))
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der Score-Berechnung
chunk_id = payload.get("chunk_id", payload.get("id", "unknown"))
logger.debug(f"📈 [SCORE-TRACE] Chunk: {chunk_id} | Base: {base_val:.4f} | Multiplier: {total_boost:.2f} | Final: {final_score:.4f}")
logger.debug(f" -> Details: StatusMult={status_mult:.2f}, TypeImpact={type_impact:.2f}, EdgeImpact={edge_impact_final:.4f}, CentImpact={cent_impact_final:.4f}")
return {
"total": final_score,
"edge_bonus": float(edge_bonus_raw),

View File

@ -71,11 +71,17 @@ from pathlib import Path
from typing import List, Dict, Any
from dotenv import load_dotenv
# Root Logger Setup: INFO-Level für volle Transparenz der fachlichen Prozesse
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s'
)
# Root Logger Setup: Nutzt zentrale setup_logging() Funktion
# WP-24c v4.4.0-DEBUG: Aktiviert DEBUG-Level für End-to-End Tracing
# Kann auch über Umgebungsvariable DEBUG=true gesteuert werden
from app.core.logging_setup import setup_logging
# Bestimme Log-Level basierend auf DEBUG Umgebungsvariable
debug_mode = os.getenv("DEBUG", "false").lower() == "true"
log_level = logging.DEBUG if debug_mode else logging.INFO
# Nutze zentrale Logging-Konfiguration (File + Console)
setup_logging(log_level=log_level)
# Sicherstellung, dass das Root-Verzeichnis im Python-Pfad liegt
sys.path.append(os.getcwd())