From 43641441ef5ebb7bd53731123d582f7795552b3f Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 12 Jan 2026 08:19:43 +0100 Subject: [PATCH] Refactor hash input and body/frontmatter handling in ingestion_processor.py for improved accuracy Update the ingestion process to utilize the parsed object instead of note_pl for hash input, body, and frontmatter extraction. This change ensures that the correct content is used for comparisons, enhancing the reliability of change detection diagnostics and improving overall ingestion accuracy. --- app/core/ingestion/ingestion_processor.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py index 4f164e7..84999cb 100644 --- a/app/core/ingestion/ingestion_processor.py +++ b/app/core/ingestion/ingestion_processor.py @@ -309,20 +309,24 @@ class IngestionService: logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})") # WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose + # WICHTIG: _get_hash_source_content benötigt das ursprüngliche parsed-Objekt, nicht note_pl! from app.core.ingestion.ingestion_note_payload import _get_hash_source_content hash_mode = self.active_hash_mode or 'full' - hash_input = _get_hash_source_content(note_pl, hash_mode) + # Verwende parsed statt note_pl, da note_pl keinen body/frontmatter enthält + hash_input = _get_hash_source_content(parsed, hash_mode) logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...") logger.info(f" -> Hash-Input Länge: {len(hash_input)}") # WP-24c v4.5.9-DEBUG: Vergleiche auch Body-Länge und Frontmatter - new_body = str(note_pl.get("body", "")).strip() + # Verwende parsed.body statt note_pl.get("body") + new_body = str(getattr(parsed, "body", "") or "").strip() old_body = str(old_payload.get("body", "")).strip() if old_payload else "" logger.info(f" -> Body-Länge: new={len(new_body)}, old={len(old_body)}") if len(new_body) != len(old_body): logger.warning(f" -> ⚠️ Body-Länge unterschiedlich! Mögliche Ursache: Parsing-Unterschiede") - new_fm = note_pl.get("frontmatter", {}) + # Verwende parsed.frontmatter statt note_pl.get("frontmatter") + new_fm = getattr(parsed, "frontmatter", {}) or {} old_fm = old_payload.get("frontmatter", {}) if old_payload else {} logger.info(f" -> Frontmatter-Keys: new={sorted(new_fm.keys())}, old={sorted(old_fm.keys())}") # Prüfe relevante Frontmatter-Felder