Update ingestion_processor.py to version 3.1.4: Implement semantic cross-note redundancy checks to enhance edge generation logic. Refactor redundancy validation to distinguish between local and cross-note redundancies, ensuring improved bidirectional graph integrity. Adjust versioning and documentation accordingly.
This commit is contained in:
parent
a392dc2786
commit
61a319a049
|
|
@ -5,8 +5,8 @@ DESCRIPTION: Der zentrale IngestionService (Orchestrator).
|
|||
WP-25a: Integration der Mixture of Experts (MoE) Architektur.
|
||||
WP-15b: Two-Pass Workflow mit globalem Kontext-Cache.
|
||||
WP-20/22: Cloud-Resilienz und Content-Lifecycle integriert.
|
||||
AUDIT v3.1.2: Redundanz-Check, ID-Resolution & Origin-Tracking.
|
||||
VERSION: 3.1.2 (WP-24c: Redundancy-Aware Symmetric Ingestion)
|
||||
AUDIT v3.1.4: Semantischer Cross-Note Redundanz-Check (Typ-spezifisch).
|
||||
VERSION: 3.1.4 (WP-24c: Semantic Cross-Note Redundancy Fix)
|
||||
STATUS: Active
|
||||
"""
|
||||
import logging
|
||||
|
|
@ -175,10 +175,10 @@ class IngestionService:
|
|||
# WP-25a: Profilgesteuerte binäre Validierung
|
||||
if cand.get("provenance") == "global_pool" and enable_smart:
|
||||
is_valid = await validate_edge_candidate(
|
||||
chunk_text=ch.text,
|
||||
edge=cand,
|
||||
batch_cache=self.batch_cache,
|
||||
llm_service=self.llm,
|
||||
ch.text,
|
||||
cand,
|
||||
self.batch_cache,
|
||||
self.llm,
|
||||
profile_name="ingest_validator"
|
||||
)
|
||||
if is_valid:
|
||||
|
|
@ -229,15 +229,32 @@ class IngestionService:
|
|||
# Validierung für Symmetrie-Erzeugung (Kein Self-Loop, Existenz der Inversen)
|
||||
if (inverse_kind and target_canonical_id and target_canonical_id != note_id):
|
||||
|
||||
# REDUNDANZ-CHECK: Existiert bereits eine explizite Gegenrichtung?
|
||||
is_redundant = any(
|
||||
# A. Lokale Redundanz: Hat der User in DIESER Note schon die Gegenrichtung definiert?
|
||||
is_local_redundant = any(
|
||||
ex.get("target_id") == target_canonical_id and
|
||||
edge_registry.resolve(ex.get("kind")) == inverse_kind
|
||||
for ex in raw_edges
|
||||
)
|
||||
|
||||
# B. Cross-Note Redundanz Fix (v3.1.4): Prüfe auf identischen semantischen Beziehungstyp in der Ziel-Note
|
||||
is_cross_redundant = False
|
||||
if target_ctx and hasattr(target_ctx, 'links'):
|
||||
for link in target_ctx.links:
|
||||
link_to = link.get("to")
|
||||
# Auflösung des Link-Ziels der anderen Note
|
||||
link_to_ctx = self.batch_cache.get(link_to)
|
||||
link_to_id = link_to_ctx.note_id if link_to_ctx else link_to
|
||||
|
||||
if link_to_id == note_id:
|
||||
# Wir prüfen nun, ob der Beziehungstyp in der Ziel-Note semantisch identisch
|
||||
# mit der geplanten Symmetrie-Kante ist.
|
||||
planned_kind_in_target = edge_registry.resolve(link.get("kind", "related_to"))
|
||||
if planned_kind_in_target == inverse_kind:
|
||||
is_cross_redundant = True
|
||||
break
|
||||
|
||||
# Nur anlegen, wenn nicht redundant und kein simpler related_to Loop
|
||||
if not is_redundant and (inverse_kind != resolved_kind or resolved_kind not in ["related_to", "references"]):
|
||||
# Nur anlegen, wenn keine semantische Redundanz vorliegt und kein simpler Loop
|
||||
if not is_local_redundant and not is_cross_redundant and (inverse_kind != resolved_kind or resolved_kind not in ["related_to", "references"]):
|
||||
inv_edge = e.copy()
|
||||
|
||||
# Richtungs-Umkehr
|
||||
|
|
@ -260,7 +277,6 @@ class IngestionService:
|
|||
|
||||
# 4. DB Upsert via modularisierter Points-Logik
|
||||
if purge_before and old_payload:
|
||||
# Hinweis: purge_artifacts wird im nächsten Schritt auf origin_note_id umgestellt
|
||||
purge_artifacts(self.client, self.prefix, note_id)
|
||||
|
||||
# Speichern der Haupt-Note
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user