diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py index 7a98a30..4f164e7 100644 --- a/app/core/ingestion/ingestion_processor.py +++ b/app/core/ingestion/ingestion_processor.py @@ -307,6 +307,31 @@ class IngestionService: logger.info(f" -> Hash-Unterschied: Erste unterschiedliche Position: {diff_pos}") else: logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})") + + # WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose + from app.core.ingestion.ingestion_note_payload import _get_hash_source_content + hash_mode = self.active_hash_mode or 'full' + hash_input = _get_hash_source_content(note_pl, hash_mode) + logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...") + logger.info(f" -> Hash-Input Länge: {len(hash_input)}") + + # WP-24c v4.5.9-DEBUG: Vergleiche auch Body-Länge und Frontmatter + new_body = str(note_pl.get("body", "")).strip() + old_body = str(old_payload.get("body", "")).strip() if old_payload else "" + logger.info(f" -> Body-Länge: new={len(new_body)}, old={len(old_body)}") + if len(new_body) != len(old_body): + logger.warning(f" -> ⚠️ Body-Länge unterschiedlich! Mögliche Ursache: Parsing-Unterschiede") + + new_fm = note_pl.get("frontmatter", {}) + old_fm = old_payload.get("frontmatter", {}) if old_payload else {} + logger.info(f" -> Frontmatter-Keys: new={sorted(new_fm.keys())}, old={sorted(old_fm.keys())}") + # Prüfe relevante Frontmatter-Felder + relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"] + for key in relevant_keys: + new_val = new_fm.get(key) + old_val = old_fm.get(key) + if new_val != old_val: + logger.warning(f" -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}") else: # WP-24c v4.5.9: Wenn Hash fehlt, als geändert behandeln (Sicherheit) logger.warning(f"⚠️ [CHANGE-DETECTION] Hash fehlt für '{note_id}': new_h={bool(new_h)}, old_h={bool(old_h)}")