Refactor hash input handling in ingestion_processor.py to use dictionary format

Update the ingestion process to convert the parsed object to a dictionary before passing it to the hash input function. This change ensures compatibility with the updated function requirements and improves the accuracy of hash comparisons during ingestion workflows.
2026-01-12 08:21:21 +01:00 · 2026-01-12 08:21:21 +01:00 · e52eed40ca
commit e52eed40ca
parent 43641441ef
1 changed files with 7 additions and 6 deletions
--- a/app/core/ingestion/ingestion_processor.py
+++ b/app/core/ingestion/ingestion_processor.py
@ -309,11 +309,12 @@ class IngestionService:
                            logger.info(f"   -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})")
                        # WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose
-                        # WICHTIG: _get_hash_source_content benötigt das ursprüngliche parsed-Objekt, nicht note_pl!
+                        # WICHTIG: _get_hash_source_content benötigt ein Dictionary, nicht das ParsedNote-Objekt!
-                        from app.core.ingestion.ingestion_note_payload import _get_hash_source_content
+                        from app.core.ingestion.ingestion_note_payload import _get_hash_source_content, _as_dict
                        hash_mode = self.active_hash_mode or 'full'
-                        # Verwende parsed statt note_pl, da note_pl keinen body/frontmatter enthält
+                        # Konvertiere parsed zu Dictionary für _get_hash_source_content
-                        hash_input = _get_hash_source_content(parsed, hash_mode)
+                        parsed_dict = _as_dict(parsed)
                        hash_input = _get_hash_source_content(parsed_dict, hash_mode)
                        logger.info(f"   -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...")
                        logger.info(f"   -> Hash-Input Länge: {len(hash_input)}")
@ -332,8 +333,8 @@ class IngestionService:
                        # Prüfe relevante Frontmatter-Felder
                        relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"]
                        for key in relevant_keys:
-                            new_val = new_fm.get(key)
+                            new_val = new_fm.get(key) if isinstance(new_fm, dict) else getattr(new_fm, key, None)
-                            old_val = old_fm.get(key)
+                            old_val = old_fm.get(key) if isinstance(old_fm, dict) else None
                            if new_val != old_val:
                                logger.warning(f"   -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}")
                else: