diff --git a/app/core/logging_setup.py b/app/core/logging_setup.py index f2f4db9..c6f23b2 100644 --- a/app/core/logging_setup.py +++ b/app/core/logging_setup.py @@ -2,36 +2,52 @@ import logging import os from logging.handlers import RotatingFileHandler -def setup_logging(): - # 1. Log-Verzeichnis erstellen (falls nicht vorhanden) +def setup_logging(log_level: int = None): + """ + Konfiguriert das Logging-System mit File- und Console-Handler. + WP-24c v4.4.0-DEBUG: Unterstützt DEBUG-Level für End-to-End Tracing. + + Args: + log_level: Optionales Log-Level (logging.DEBUG, logging.INFO, etc.) + Falls nicht gesetzt, wird aus DEBUG Umgebungsvariable gelesen. + """ + # 1. Log-Level bestimmen + if log_level is None: + # WP-24c v4.4.0-DEBUG: Unterstützung für DEBUG-Level via Umgebungsvariable + debug_mode = os.getenv("DEBUG", "false").lower() == "true" + log_level = logging.DEBUG if debug_mode else logging.INFO + + # 2. Log-Verzeichnis erstellen (falls nicht vorhanden) log_dir = "logs" if not os.path.exists(log_dir): os.makedirs(log_dir) log_file = os.path.join(log_dir, "mindnet.log") - # 2. Formatter definieren (Zeitstempel | Level | Modul | Nachricht) + # 3. Formatter definieren (Zeitstempel | Level | Modul | Nachricht) formatter = logging.Formatter( '%(asctime)s | %(levelname)-8s | %(name)s | %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) - # 3. File Handler: Schreibt in Datei (max. 5MB pro Datei, behält 5 Backups) + # 4. File Handler: Schreibt in Datei (max. 5MB pro Datei, behält 5 Backups) file_handler = RotatingFileHandler( log_file, maxBytes=5*1024*1024, backupCount=5, encoding='utf-8' ) file_handler.setFormatter(formatter) - file_handler.setLevel(logging.INFO) + file_handler.setLevel(log_level) # WP-24c v4.4.0-DEBUG: Respektiert log_level - # 4. Stream Handler: Schreibt weiterhin auf die Konsole + # 5. Stream Handler: Schreibt weiterhin auf die Konsole console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) - console_handler.setLevel(logging.INFO) + console_handler.setLevel(log_level) # WP-24c v4.4.0-DEBUG: Respektiert log_level - # 5. Root Logger konfigurieren + # 6. Root Logger konfigurieren logging.basicConfig( - level=logging.INFO, - handlers=[file_handler, console_handler] + level=log_level, + handlers=[file_handler, console_handler], + force=True # Überschreibt bestehende Konfiguration ) - logging.info(f"📝 Logging initialized. Writing to {log_file}") \ No newline at end of file + level_name = "DEBUG" if log_level == logging.DEBUG else "INFO" + logging.info(f"📝 Logging initialized (Level: {level_name}). Writing to {log_file}") \ No newline at end of file diff --git a/app/core/retrieval/decision_engine.py b/app/core/retrieval/decision_engine.py index e74e60a..ed90d21 100644 --- a/app/core/retrieval/decision_engine.py +++ b/app/core/retrieval/decision_engine.py @@ -211,7 +211,23 @@ class DecisionEngine: explain=True ) + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung vor der Suche + logger.info(f"🔍 [RETRIEVAL] Starte Stream: '{name}'") + logger.info(f" -> Transformierte Query: '{transformed_query}'") + logger.debug(f" ⚙️ [FILTER] Angewandte Metadaten-Filter: {request.filters}") + logger.debug(f" ⚙️ [FILTER] Top-K: {request.top_k}, Expand-Depth: {request.expand.get('depth') if request.expand else None}") + response = await self.retriever.search(request) + + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung nach der Suche + if not response.results: + logger.warning(f"⚠️ [EMPTY] Stream '{name}' lieferte 0 Ergebnisse.") + else: + logger.info(f"✨ [SUCCESS] Stream '{name}' lieferte {len(response.results)} Treffer.") + # Top 3 Treffer im DEBUG-Level loggen + for i, hit in enumerate(response.results[:3]): + logger.debug(f" [{i+1}] Chunk: {hit.chunk_id} | Score: {hit.score:.4f} | Path: {hit.source.get('path', 'N/A')}") + for hit in response.results: hit.stream_origin = name return response diff --git a/app/core/retrieval/retriever.py b/app/core/retrieval/retriever.py index 9423ba3..1771b68 100644 --- a/app/core/retrieval/retriever.py +++ b/app/core/retrieval/retriever.py @@ -126,6 +126,20 @@ def _semantic_hits( filters = {"section": target_section} raw_hits = qp.search_chunks_by_vector(client, prefix, vector, top=top_k, filters=filters) + + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der rohen Qdrant-Antwort + logger.debug(f"📊 [RAW-HITS] Qdrant lieferte {len(raw_hits)} Roh-Treffer (Top-K: {top_k})") + if filters: + logger.debug(f" ⚙️ [FILTER] Angewandte Filter: {filters}") + + # Logge die Top 3 Roh-Scores für Diagnose + for i, hit in enumerate(raw_hits[:3]): + hit_id = str(hit[0]) if hit else "N/A" + hit_score = float(hit[1]) if hit and len(hit) > 1 else 0.0 + hit_payload = dict(hit[2] or {}) if hit and len(hit) > 2 else {} + hit_path = hit_payload.get('path', 'N/A') + logger.debug(f" [{i+1}] ID: {hit_id} | Raw-Score: {hit_score:.4f} | Path: {hit_path}") + # Strikte Typkonvertierung für Stabilität return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits] @@ -335,14 +349,30 @@ def _build_hits_from_semantic( source_chunk_id=source_chunk_id # WP-24c v4.1.0: RAG-Kontext )) - return QueryResponse(results=results, used_mode=used_mode, latency_ms=int((time.time() - t0) * 1000)) + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Finale Ergebnisse + latency_ms = int((time.time() - t0) * 1000) + if not results: + logger.warning(f"⚠️ [EMPTY] Hybride Suche lieferte 0 Ergebnisse (Latency: {latency_ms}ms)") + else: + logger.info(f"✨ [SUCCESS] Hybride Suche lieferte {len(results)} Treffer (Latency: {latency_ms}ms)") + # Top 3 finale Scores loggen + for i, hit in enumerate(results[:3]): + logger.debug(f" [{i+1}] Final: Chunk={hit.chunk_id} | Total-Score={hit.total_score:.4f} | Semantic={hit.semantic_score:.4f} | Edge={hit.edge_bonus:.4f}") + + return QueryResponse(results=results, used_mode=used_mode, latency_ms=latency_ms) def hybrid_retrieve(req: QueryRequest) -> QueryResponse: """ Die Haupt-Einstiegsfunktion für die hybride Suche. WP-15c: Implementiert Edge-Aggregation (Super-Kanten). + WP-24c v4.5.0-DEBUG: Retrieval-Tracer für Diagnose. """ + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Start der hybriden Suche + logger.info(f"🔍 [RETRIEVAL] Starte hybride Suche") + logger.info(f" -> Query: '{req.query[:100]}...' (Länge: {len(req.query)})") + logger.debug(f" ⚙️ [FILTER] Request-Filter: {req.filters}") + logger.debug(f" ⚙️ [FILTER] Top-K: {req.top_k}, Expand: {req.expand}, Target-Section: {req.target_section}") client, prefix = _get_client_and_prefix() vector = list(req.query_vector) if req.query_vector else _get_query_vector(req) top_k = req.top_k or 10 @@ -350,7 +380,14 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: # 1. Semantische Seed-Suche (Wir laden etwas mehr für das Pooling) # WP-24c v4.1.0: Section-Filtering unterstützen target_section = getattr(req, "target_section", None) + + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Vor semantischer Suche + logger.debug(f"🔍 [RETRIEVAL] Starte semantische Seed-Suche (Top-K: {top_k * 3}, Target-Section: {target_section})") + hits = _semantic_hits(client, prefix, vector, top_k=top_k * 3, filters=req.filters, target_section=target_section) + + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Nach semantischer Suche + logger.debug(f"📊 [SEED-HITS] Semantische Suche lieferte {len(hits)} Seed-Treffer") # 2. Graph Expansion Konfiguration expand_cfg = req.expand if isinstance(req.expand, dict) else {} @@ -470,7 +507,21 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: # 3. Scoring & Explanation Generierung # top_k wird erst hier final angewandt - return _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges) + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Vor finaler Hit-Erstellung + if subgraph: + logger.debug(f"📊 [GRAPH] Subgraph enthält {len(subgraph.edges)} Kanten für {len(seed_ids)} Seed-Notizen") + else: + logger.debug(f"📊 [GRAPH] Kein Subgraph (depth=0 oder keine Seed-IDs)") + + result = _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges) + + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Nach finaler Hit-Erstellung + if not result.results: + logger.warning(f"⚠️ [EMPTY] Hybride Suche lieferte nach Scoring 0 finale Ergebnisse") + else: + logger.info(f"✨ [SUCCESS] Hybride Suche lieferte {len(result.results)} finale Treffer (Mode: {result.used_mode})") + + return result def semantic_retrieve(req: QueryRequest) -> QueryResponse: diff --git a/app/core/retrieval/retriever_scoring.py b/app/core/retrieval/retriever_scoring.py index ce913cb..b8be453 100644 --- a/app/core/retrieval/retriever_scoring.py +++ b/app/core/retrieval/retriever_scoring.py @@ -110,6 +110,11 @@ def compute_wp22_score( # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor) final_score = max(0.0001, float(total)) + # WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung der Score-Berechnung + chunk_id = payload.get("chunk_id", payload.get("id", "unknown")) + logger.debug(f"📈 [SCORE-TRACE] Chunk: {chunk_id} | Base: {base_val:.4f} | Multiplier: {total_boost:.2f} | Final: {final_score:.4f}") + logger.debug(f" -> Details: StatusMult={status_mult:.2f}, TypeImpact={type_impact:.2f}, EdgeImpact={edge_impact_final:.4f}, CentImpact={cent_impact_final:.4f}") + return { "total": final_score, "edge_bonus": float(edge_bonus_raw), diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index f50ff44..6a75f66 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -71,11 +71,17 @@ from pathlib import Path from typing import List, Dict, Any from dotenv import load_dotenv -# Root Logger Setup: INFO-Level für volle Transparenz der fachlichen Prozesse -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(message)s' -) +# Root Logger Setup: Nutzt zentrale setup_logging() Funktion +# WP-24c v4.4.0-DEBUG: Aktiviert DEBUG-Level für End-to-End Tracing +# Kann auch über Umgebungsvariable DEBUG=true gesteuert werden +from app.core.logging_setup import setup_logging + +# Bestimme Log-Level basierend auf DEBUG Umgebungsvariable +debug_mode = os.getenv("DEBUG", "false").lower() == "true" +log_level = logging.DEBUG if debug_mode else logging.INFO + +# Nutze zentrale Logging-Konfiguration (File + Console) +setup_logging(log_level=log_level) # Sicherstellung, dass das Root-Verzeichnis im Python-Pfad liegt sys.path.append(os.getcwd())