Enhance logging in ingestion_processor.py for detailed change detection diagnostics

Add comprehensive logging for hash input, body length comparisons, and frontmatter key checks in the change detection process. This update aims to improve traceability and facilitate debugging by providing insights into potential discrepancies between new and old payloads during ingestion workflows.
This commit is contained in:
Lars 2026-01-12 08:16:03 +01:00
parent de5db09b51
commit c613d81846

View File

@ -307,6 +307,31 @@ class IngestionService:
logger.info(f" -> Hash-Unterschied: Erste unterschiedliche Position: {diff_pos}") logger.info(f" -> Hash-Unterschied: Erste unterschiedliche Position: {diff_pos}")
else: else:
logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})") logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})")
# WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose
from app.core.ingestion.ingestion_note_payload import _get_hash_source_content
hash_mode = self.active_hash_mode or 'full'
hash_input = _get_hash_source_content(note_pl, hash_mode)
logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...")
logger.info(f" -> Hash-Input Länge: {len(hash_input)}")
# WP-24c v4.5.9-DEBUG: Vergleiche auch Body-Länge und Frontmatter
new_body = str(note_pl.get("body", "")).strip()
old_body = str(old_payload.get("body", "")).strip() if old_payload else ""
logger.info(f" -> Body-Länge: new={len(new_body)}, old={len(old_body)}")
if len(new_body) != len(old_body):
logger.warning(f" -> ⚠️ Body-Länge unterschiedlich! Mögliche Ursache: Parsing-Unterschiede")
new_fm = note_pl.get("frontmatter", {})
old_fm = old_payload.get("frontmatter", {}) if old_payload else {}
logger.info(f" -> Frontmatter-Keys: new={sorted(new_fm.keys())}, old={sorted(old_fm.keys())}")
# Prüfe relevante Frontmatter-Felder
relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"]
for key in relevant_keys:
new_val = new_fm.get(key)
old_val = old_fm.get(key)
if new_val != old_val:
logger.warning(f" -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}")
else: else:
# WP-24c v4.5.9: Wenn Hash fehlt, als geändert behandeln (Sicherheit) # WP-24c v4.5.9: Wenn Hash fehlt, als geändert behandeln (Sicherheit)
logger.warning(f"⚠️ [CHANGE-DETECTION] Hash fehlt für '{note_id}': new_h={bool(new_h)}, old_h={bool(old_h)}") logger.warning(f"⚠️ [CHANGE-DETECTION] Hash fehlt für '{note_id}': new_h={bool(new_h)}, old_h={bool(old_h)}")