From c613d81846ae41d046daabb0c59764df16970168 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 12 Jan 2026 08:16:03 +0100 Subject: [PATCH] Enhance logging in ingestion_processor.py for detailed change detection diagnostics Add comprehensive logging for hash input, body length comparisons, and frontmatter key checks in the change detection process. This update aims to improve traceability and facilitate debugging by providing insights into potential discrepancies between new and old payloads during ingestion workflows. --- app/core/ingestion/ingestion_processor.py | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py index 7a98a30..4f164e7 100644 --- a/app/core/ingestion/ingestion_processor.py +++ b/app/core/ingestion/ingestion_processor.py @@ -307,6 +307,31 @@ class IngestionService: logger.info(f" -> Hash-Unterschied: Erste unterschiedliche Position: {diff_pos}") else: logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})") + + # WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose + from app.core.ingestion.ingestion_note_payload import _get_hash_source_content + hash_mode = self.active_hash_mode or 'full' + hash_input = _get_hash_source_content(note_pl, hash_mode) + logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...") + logger.info(f" -> Hash-Input Länge: {len(hash_input)}") + + # WP-24c v4.5.9-DEBUG: Vergleiche auch Body-Länge und Frontmatter + new_body = str(note_pl.get("body", "")).strip() + old_body = str(old_payload.get("body", "")).strip() if old_payload else "" + logger.info(f" -> Body-Länge: new={len(new_body)}, old={len(old_body)}") + if len(new_body) != len(old_body): + logger.warning(f" -> ⚠️ Body-Länge unterschiedlich! Mögliche Ursache: Parsing-Unterschiede") + + new_fm = note_pl.get("frontmatter", {}) + old_fm = old_payload.get("frontmatter", {}) if old_payload else {} + logger.info(f" -> Frontmatter-Keys: new={sorted(new_fm.keys())}, old={sorted(old_fm.keys())}") + # Prüfe relevante Frontmatter-Felder + relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"] + for key in relevant_keys: + new_val = new_fm.get(key) + old_val = old_fm.get(key) + if new_val != old_val: + logger.warning(f" -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}") else: # WP-24c v4.5.9: Wenn Hash fehlt, als geändert behandeln (Sicherheit) logger.warning(f"⚠️ [CHANGE-DETECTION] Hash fehlt für '{note_id}': new_h={bool(new_h)}, old_h={bool(old_h)}")