WP24c - Agentic Edge Validation & Chunk-Aware Multigraph-System (v4.5.8) #22
|
|
@ -248,12 +248,16 @@ class IngestionService:
|
|||
if ".trash" in file_path or any(part.startswith('.') for part in file_path.split(os.sep)):
|
||||
return {**result, "status": "skipped", "reason": "ignored_folder"}
|
||||
|
||||
parsed = read_markdown(file_path)
|
||||
# WP-24c v4.5.9: Path-Normalization für konsistente Hash-Prüfung
|
||||
# Normalisiere file_path zu absolutem Pfad für konsistente Verarbeitung
|
||||
normalized_file_path = os.path.abspath(file_path) if not os.path.isabs(file_path) else file_path
|
||||
|
||||
parsed = read_markdown(normalized_file_path)
|
||||
if not parsed: return {**result, "error": "Empty file"}
|
||||
fm = normalize_frontmatter(parsed.frontmatter)
|
||||
validate_required_frontmatter(fm)
|
||||
|
||||
note_pl = make_note_payload(parsed, vault_root=vault_root, file_path=file_path, types_cfg=self.registry)
|
||||
note_pl = make_note_payload(parsed, vault_root=vault_root, file_path=normalized_file_path, types_cfg=self.registry)
|
||||
note_id = note_pl.get("note_id")
|
||||
|
||||
if not note_id:
|
||||
|
|
@ -261,22 +265,36 @@ class IngestionService:
|
|||
|
||||
logger.info(f"📄 Bearbeite: '{note_id}'")
|
||||
|
||||
# Change Detection (WP-24c v4.2.4: Hash-basierte Inhaltsprüfung)
|
||||
# WP-24c v4.5.9: Strikte Change Detection (Hash-basierte Inhaltsprüfung)
|
||||
# Prüft Hash VOR der Verarbeitung, um redundante Ingestion zu vermeiden
|
||||
old_payload = None if force_replace else fetch_note_payload(self.client, self.prefix, note_id)
|
||||
c_miss, e_miss = artifacts_missing(self.client, self.prefix, note_id)
|
||||
|
||||
content_changed = True
|
||||
hash_match = False
|
||||
if old_payload and not force_replace:
|
||||
# Nutzt die über MINDNET_CHANGE_DETECTION_MODE gesteuerte Genauigkeit
|
||||
# Mapping: 'full' -> 'full:parsed:canonical', 'body' -> 'body:parsed:canonical'
|
||||
h_key = f"{self.active_hash_mode or 'full'}:parsed:canonical"
|
||||
new_h = note_pl.get("hashes", {}).get(h_key)
|
||||
old_h = old_payload.get("hashes", {}).get(h_key)
|
||||
if new_h and old_h and new_h == old_h:
|
||||
content_changed = False
|
||||
|
||||
if new_h and old_h:
|
||||
hash_match = (new_h == old_h)
|
||||
if hash_match:
|
||||
content_changed = False
|
||||
logger.debug(f"🔍 [CHANGE-DETECTION] Hash identisch für '{note_id}': {h_key} = {new_h[:16]}...")
|
||||
else:
|
||||
logger.debug(f"🔍 [CHANGE-DETECTION] Hash geändert für '{note_id}': alt={old_h[:16]}..., neu={new_h[:16]}...")
|
||||
else:
|
||||
# WP-24c v4.5.9: Wenn Hash fehlt, als geändert behandeln (Sicherheit)
|
||||
logger.debug(f"🔍 [CHANGE-DETECTION] Hash fehlt für '{note_id}': new_h={bool(new_h)}, old_h={bool(old_h)}")
|
||||
|
||||
if not (force_replace or content_changed or not old_payload or c_miss or e_miss):
|
||||
return {**result, "status": "unchanged", "note_id": note_id}
|
||||
# WP-24c v4.5.9: Strikte Logik - überspringe komplett wenn Hash identisch UND keine Artefakte fehlen
|
||||
# Dies verhindert redundante Embedding-Generierung und Chunk-Verarbeitung
|
||||
if not force_replace and hash_match and old_payload and not c_miss and not e_miss:
|
||||
logger.info(f"⏭️ [SKIP] '{note_id}' unverändert (Hash identisch, alle Artefakte vorhanden)")
|
||||
return {**result, "status": "unchanged", "note_id": note_id, "reason": "hash_identical"}
|
||||
|
||||
if not apply:
|
||||
return {**result, "status": "dry-run", "changed": True, "note_id": note_id}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user