From e52eed40ca357dab3f9d0043fbb388a24c4bcdc6 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 12 Jan 2026 08:21:21 +0100 Subject: [PATCH] Refactor hash input handling in ingestion_processor.py to use dictionary format Update the ingestion process to convert the parsed object to a dictionary before passing it to the hash input function. This change ensures compatibility with the updated function requirements and improves the accuracy of hash comparisons during ingestion workflows. --- app/core/ingestion/ingestion_processor.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py index 84999cb..5d3ed9d 100644 --- a/app/core/ingestion/ingestion_processor.py +++ b/app/core/ingestion/ingestion_processor.py @@ -309,11 +309,12 @@ class IngestionService: logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})") # WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose - # WICHTIG: _get_hash_source_content benötigt das ursprüngliche parsed-Objekt, nicht note_pl! - from app.core.ingestion.ingestion_note_payload import _get_hash_source_content + # WICHTIG: _get_hash_source_content benötigt ein Dictionary, nicht das ParsedNote-Objekt! + from app.core.ingestion.ingestion_note_payload import _get_hash_source_content, _as_dict hash_mode = self.active_hash_mode or 'full' - # Verwende parsed statt note_pl, da note_pl keinen body/frontmatter enthält - hash_input = _get_hash_source_content(parsed, hash_mode) + # Konvertiere parsed zu Dictionary für _get_hash_source_content + parsed_dict = _as_dict(parsed) + hash_input = _get_hash_source_content(parsed_dict, hash_mode) logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...") logger.info(f" -> Hash-Input Länge: {len(hash_input)}") @@ -332,8 +333,8 @@ class IngestionService: # Prüfe relevante Frontmatter-Felder relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"] for key in relevant_keys: - new_val = new_fm.get(key) - old_val = old_fm.get(key) + new_val = new_fm.get(key) if isinstance(new_fm, dict) else getattr(new_fm, key, None) + old_val = old_fm.get(key) if isinstance(old_fm, dict) else None if new_val != old_val: logger.warning(f" -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}") else: