Refactor hash input handling in ingestion_processor.py to use dictionary format
Update the ingestion process to convert the parsed object to a dictionary before passing it to the hash input function. This change ensures compatibility with the updated function requirements and improves the accuracy of hash comparisons during ingestion workflows.
This commit is contained in:
parent
43641441ef
commit
e52eed40ca
|
|
@ -309,11 +309,12 @@ class IngestionService:
|
||||||
logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})")
|
logger.info(f" -> Hash-Unterschied: Längen unterschiedlich (new={len(new_h)}, old={len(old_h)})")
|
||||||
|
|
||||||
# WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose
|
# WP-24c v4.5.9-DEBUG: Logge Hash-Input für Diagnose
|
||||||
# WICHTIG: _get_hash_source_content benötigt das ursprüngliche parsed-Objekt, nicht note_pl!
|
# WICHTIG: _get_hash_source_content benötigt ein Dictionary, nicht das ParsedNote-Objekt!
|
||||||
from app.core.ingestion.ingestion_note_payload import _get_hash_source_content
|
from app.core.ingestion.ingestion_note_payload import _get_hash_source_content, _as_dict
|
||||||
hash_mode = self.active_hash_mode or 'full'
|
hash_mode = self.active_hash_mode or 'full'
|
||||||
# Verwende parsed statt note_pl, da note_pl keinen body/frontmatter enthält
|
# Konvertiere parsed zu Dictionary für _get_hash_source_content
|
||||||
hash_input = _get_hash_source_content(parsed, hash_mode)
|
parsed_dict = _as_dict(parsed)
|
||||||
|
hash_input = _get_hash_source_content(parsed_dict, hash_mode)
|
||||||
logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...")
|
logger.info(f" -> Hash-Input (erste 200 Zeichen): {hash_input[:200]}...")
|
||||||
logger.info(f" -> Hash-Input Länge: {len(hash_input)}")
|
logger.info(f" -> Hash-Input Länge: {len(hash_input)}")
|
||||||
|
|
||||||
|
|
@ -332,8 +333,8 @@ class IngestionService:
|
||||||
# Prüfe relevante Frontmatter-Felder
|
# Prüfe relevante Frontmatter-Felder
|
||||||
relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"]
|
relevant_keys = ["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight", "split_level", "strict_heading_split"]
|
||||||
for key in relevant_keys:
|
for key in relevant_keys:
|
||||||
new_val = new_fm.get(key)
|
new_val = new_fm.get(key) if isinstance(new_fm, dict) else getattr(new_fm, key, None)
|
||||||
old_val = old_fm.get(key)
|
old_val = old_fm.get(key) if isinstance(old_fm, dict) else None
|
||||||
if new_val != old_val:
|
if new_val != old_val:
|
||||||
logger.warning(f" -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}")
|
logger.warning(f" -> ⚠️ Frontmatter '{key}' unterschiedlich: new={new_val}, old={old_val}")
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user