Refactor hash input and body/frontmatter handling in ingestion_processor.py for improved accuracy
Update the ingestion process to utilize the parsed object instead of note_pl for hash input, body, and frontmatter extraction. This change ensures that the correct content is used for comparisons, enhancing the reliability of change detection diagnostics and improving overall ingestion accuracy.
This commit is contained in:
parent
c613d81846
commit
43641441ef
|
|
@ -309,20 +309,24 @@ class IngestionService:
|
||||||
logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})")
|
logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})")
|
||||||
|
|
||||||
# WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose
|
# WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose
|
||||||
|
# WICHTIG: _get_hash_source_content benötigt das ursprüngliche parsed-Objekt, nicht note_pl!
|
||||||
from app.core.ingestion.ingestion_note_payload import _get_hash_source_content
|
from app.core.ingestion.ingestion_note_payload import _get_hash_source_content
|
||||||
hash_mode = self.active_hash_mode or 'full'
|
hash_mode = self.active_hash_mode or 'full'
|
||||||
hash_input = _get_hash_source_content(note_pl, hash_mode)
|
# Verwende parsed statt note_pl, da note_pl keinen body/frontmatter enthält
|
||||||
|
hash_input = _get_hash_source_content(parsed, hash_mode)
|
||||||
logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...")
|
logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...")
|
||||||
logger.info(f" -> Hash-Input Länge: {len(hash_input)}")
|
logger.info(f" -> Hash-Input Länge: {len(hash_input)}")
|
||||||
|
|
||||||
# WP-24c v4.5.9-DEBUG: Vergleiche auch Body-Länge und Frontmatter
|
# WP-24c v4.5.9-DEBUG: Vergleiche auch Body-Länge und Frontmatter
|
||||||
new_body = str(note_pl.get("body", "")).strip()
|
# Verwende parsed.body statt note_pl.get("body")
|
||||||
|
new_body = str(getattr(parsed, "body", "") or "").strip()
|
||||||
old_body = str(old_payload.get("body", "")).strip() if old_payload else ""
|
old_body = str(old_payload.get("body", "")).strip() if old_payload else ""
|
||||||
logger.info(f" -> Body-Länge: new={len(new_body)}, old={len(old_body)}")
|
logger.info(f" -> Body-Länge: new={len(new_body)}, old={len(old_body)}")
|
||||||
if len(new_body) != len(old_body):
|
if len(new_body) != len(old_body):
|
||||||
logger.warning(f" -> ⚠️ Body-Länge unterschiedlich! Mögliche Ursache: Parsing-Unterschiede")
|
logger.warning(f" -> ⚠️ Body-Länge unterschiedlich! Mögliche Ursache: Parsing-Unterschiede")
|
||||||
|
|
||||||
new_fm = note_pl.get("frontmatter", {})
|
# Verwende parsed.frontmatter statt note_pl.get("frontmatter")
|
||||||
|
new_fm = getattr(parsed, "frontmatter", {}) or {}
|
||||||
old_fm = old_payload.get("frontmatter", {}) if old_payload else {}
|
old_fm = old_payload.get("frontmatter", {}) if old_payload else {}
|
||||||
logger.info(f" -> Frontmatter-Keys: new={sorted(new_fm.keys())}, old={sorted(old_fm.keys())}")
|
logger.info(f" -> Frontmatter-Keys: new={sorted(new_fm.keys())}, old={sorted(old_fm.keys())}")
|
||||||
# Prüfe relevante Frontmatter-Felder
|
# Prüfe relevante Frontmatter-Felder
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user