WP24c - Agentic Edge Validation & Chunk-Aware Multigraph-System (v4.5.8) #22

Merged
Lars merged 71 commits from WP24c into main 2026-01-12 10:53:20 +01:00
Showing only changes of commit 78fbc9b31b - Show all commits

View File

@ -248,12 +248,16 @@ class IngestionService:
if ".trash" in file_path or any(part.startswith('.') for part in file_path.split(os.sep)): if ".trash" in file_path or any(part.startswith('.') for part in file_path.split(os.sep)):
return {**result, "status": "skipped", "reason": "ignored_folder"} return {**result, "status": "skipped", "reason": "ignored_folder"}
parsed = read_markdown(file_path) # WP-24c v4.5.9: Path-Normalization für konsistente Hash-Prüfung
# Normalisiere file_path zu absolutem Pfad für konsistente Verarbeitung
normalized_file_path = os.path.abspath(file_path) if not os.path.isabs(file_path) else file_path
parsed = read_markdown(normalized_file_path)
if not parsed: return {**result, "error": "Empty file"} if not parsed: return {**result, "error": "Empty file"}
fm = normalize_frontmatter(parsed.frontmatter) fm = normalize_frontmatter(parsed.frontmatter)
validate_required_frontmatter(fm) validate_required_frontmatter(fm)
note_pl = make_note_payload(parsed, vault_root=vault_root, file_path=file_path, types_cfg=self.registry) note_pl = make_note_payload(parsed, vault_root=vault_root, file_path=normalized_file_path, types_cfg=self.registry)
note_id = note_pl.get("note_id") note_id = note_pl.get("note_id")
if not note_id: if not note_id:
@ -261,22 +265,36 @@ class IngestionService:
logger.info(f"📄 Bearbeite: '{note_id}'") logger.info(f"📄 Bearbeite: '{note_id}'")
# Change Detection (WP-24c v4.2.4: Hash-basierte Inhaltsprüfung) # WP-24c v4.5.9: Strikte Change Detection (Hash-basierte Inhaltsprüfung)
# Prüft Hash VOR der Verarbeitung, um redundante Ingestion zu vermeiden
old_payload = None if force_replace else fetch_note_payload(self.client, self.prefix, note_id) old_payload = None if force_replace else fetch_note_payload(self.client, self.prefix, note_id)
c_miss, e_miss = artifacts_missing(self.client, self.prefix, note_id) c_miss, e_miss = artifacts_missing(self.client, self.prefix, note_id)
content_changed = True content_changed = True
hash_match = False
if old_payload and not force_replace: if old_payload and not force_replace:
# Nutzt die über MINDNET_CHANGE_DETECTION_MODE gesteuerte Genauigkeit # Nutzt die über MINDNET_CHANGE_DETECTION_MODE gesteuerte Genauigkeit
# Mapping: 'full' -> 'full:parsed:canonical', 'body' -> 'body:parsed:canonical' # Mapping: 'full' -> 'full:parsed:canonical', 'body' -> 'body:parsed:canonical'
h_key = f"{self.active_hash_mode or 'full'}:parsed:canonical" h_key = f"{self.active_hash_mode or 'full'}:parsed:canonical"
new_h = note_pl.get("hashes", {}).get(h_key) new_h = note_pl.get("hashes", {}).get(h_key)
old_h = old_payload.get("hashes", {}).get(h_key) old_h = old_payload.get("hashes", {}).get(h_key)
if new_h and old_h and new_h == old_h:
content_changed = False if new_h and old_h:
hash_match = (new_h == old_h)
if hash_match:
content_changed = False
logger.debug(f"🔍 [CHANGE-DETECTION] Hash identisch für '{note_id}': {h_key} = {new_h[:16]}...")
else:
logger.debug(f"🔍 [CHANGE-DETECTION] Hash geändert für '{note_id}': alt={old_h[:16]}..., neu={new_h[:16]}...")
else:
# WP-24c v4.5.9: Wenn Hash fehlt, als geändert behandeln (Sicherheit)
logger.debug(f"🔍 [CHANGE-DETECTION] Hash fehlt für '{note_id}': new_h={bool(new_h)}, old_h={bool(old_h)}")
if not (force_replace or content_changed or not old_payload or c_miss or e_miss): # WP-24c v4.5.9: Strikte Logik - überspringe komplett wenn Hash identisch UND keine Artefakte fehlen
return {**result, "status": "unchanged", "note_id": note_id} # Dies verhindert redundante Embedding-Generierung und Chunk-Verarbeitung
if not force_replace and hash_match and old_payload and not c_miss and not e_miss:
logger.info(f"⏭️ [SKIP] '{note_id}' unverändert (Hash identisch, alle Artefakte vorhanden)")
return {**result, "status": "unchanged", "note_id": note_id, "reason": "hash_identical"}
if not apply: if not apply:
return {**result, "status": "dry-run", "changed": True, "note_id": note_id} return {**result, "status": "dry-run", "changed": True, "note_id": note_id}