Update ingestion_processor.py to version 3.1.4: Implement semantic cross-note redundancy checks to enhance edge generation logic. Refactor redundancy validation to distinguish between local and cross-note redundancies, ensuring improved bidirectional graph integrity. Adjust versioning and documentation accordingly.
This commit is contained in:
parent
a392dc2786
commit
61a319a049
|
|
@ -5,8 +5,8 @@ DESCRIPTION: Der zentrale IngestionService (Orchestrator).
|
||||||
WP-25a: Integration der Mixture of Experts (MoE) Architektur.
|
WP-25a: Integration der Mixture of Experts (MoE) Architektur.
|
||||||
WP-15b: Two-Pass Workflow mit globalem Kontext-Cache.
|
WP-15b: Two-Pass Workflow mit globalem Kontext-Cache.
|
||||||
WP-20/22: Cloud-Resilienz und Content-Lifecycle integriert.
|
WP-20/22: Cloud-Resilienz und Content-Lifecycle integriert.
|
||||||
AUDIT v3.1.2: Redundanz-Check, ID-Resolution & Origin-Tracking.
|
AUDIT v3.1.4: Semantischer Cross-Note Redundanz-Check (Typ-spezifisch).
|
||||||
VERSION: 3.1.2 (WP-24c: Redundancy-Aware Symmetric Ingestion)
|
VERSION: 3.1.4 (WP-24c: Semantic Cross-Note Redundancy Fix)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -175,10 +175,10 @@ class IngestionService:
|
||||||
# WP-25a: Profilgesteuerte binäre Validierung
|
# WP-25a: Profilgesteuerte binäre Validierung
|
||||||
if cand.get("provenance") == "global_pool" and enable_smart:
|
if cand.get("provenance") == "global_pool" and enable_smart:
|
||||||
is_valid = await validate_edge_candidate(
|
is_valid = await validate_edge_candidate(
|
||||||
chunk_text=ch.text,
|
ch.text,
|
||||||
edge=cand,
|
cand,
|
||||||
batch_cache=self.batch_cache,
|
self.batch_cache,
|
||||||
llm_service=self.llm,
|
self.llm,
|
||||||
profile_name="ingest_validator"
|
profile_name="ingest_validator"
|
||||||
)
|
)
|
||||||
if is_valid:
|
if is_valid:
|
||||||
|
|
@ -229,15 +229,32 @@ class IngestionService:
|
||||||
# Validierung für Symmetrie-Erzeugung (Kein Self-Loop, Existenz der Inversen)
|
# Validierung für Symmetrie-Erzeugung (Kein Self-Loop, Existenz der Inversen)
|
||||||
if (inverse_kind and target_canonical_id and target_canonical_id != note_id):
|
if (inverse_kind and target_canonical_id and target_canonical_id != note_id):
|
||||||
|
|
||||||
# REDUNDANZ-CHECK: Existiert bereits eine explizite Gegenrichtung?
|
# A. Lokale Redundanz: Hat der User in DIESER Note schon die Gegenrichtung definiert?
|
||||||
is_redundant = any(
|
is_local_redundant = any(
|
||||||
ex.get("target_id") == target_canonical_id and
|
ex.get("target_id") == target_canonical_id and
|
||||||
edge_registry.resolve(ex.get("kind")) == inverse_kind
|
edge_registry.resolve(ex.get("kind")) == inverse_kind
|
||||||
for ex in raw_edges
|
for ex in raw_edges
|
||||||
)
|
)
|
||||||
|
|
||||||
# Nur anlegen, wenn nicht redundant und kein simpler related_to Loop
|
# B. Cross-Note Redundanz Fix (v3.1.4): Prüfe auf identischen semantischen Beziehungstyp in der Ziel-Note
|
||||||
if not is_redundant and (inverse_kind != resolved_kind or resolved_kind not in ["related_to", "references"]):
|
is_cross_redundant = False
|
||||||
|
if target_ctx and hasattr(target_ctx, 'links'):
|
||||||
|
for link in target_ctx.links:
|
||||||
|
link_to = link.get("to")
|
||||||
|
# Auflösung des Link-Ziels der anderen Note
|
||||||
|
link_to_ctx = self.batch_cache.get(link_to)
|
||||||
|
link_to_id = link_to_ctx.note_id if link_to_ctx else link_to
|
||||||
|
|
||||||
|
if link_to_id == note_id:
|
||||||
|
# Wir prüfen nun, ob der Beziehungstyp in der Ziel-Note semantisch identisch
|
||||||
|
# mit der geplanten Symmetrie-Kante ist.
|
||||||
|
planned_kind_in_target = edge_registry.resolve(link.get("kind", "related_to"))
|
||||||
|
if planned_kind_in_target == inverse_kind:
|
||||||
|
is_cross_redundant = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Nur anlegen, wenn keine semantische Redundanz vorliegt und kein simpler Loop
|
||||||
|
if not is_local_redundant and not is_cross_redundant and (inverse_kind != resolved_kind or resolved_kind not in ["related_to", "references"]):
|
||||||
inv_edge = e.copy()
|
inv_edge = e.copy()
|
||||||
|
|
||||||
# Richtungs-Umkehr
|
# Richtungs-Umkehr
|
||||||
|
|
@ -260,7 +277,6 @@ class IngestionService:
|
||||||
|
|
||||||
# 4. DB Upsert via modularisierter Points-Logik
|
# 4. DB Upsert via modularisierter Points-Logik
|
||||||
if purge_before and old_payload:
|
if purge_before and old_payload:
|
||||||
# Hinweis: purge_artifacts wird im nächsten Schritt auf origin_note_id umgestellt
|
|
||||||
purge_artifacts(self.client, self.prefix, note_id)
|
purge_artifacts(self.client, self.prefix, note_id)
|
||||||
|
|
||||||
# Speichern der Haupt-Note
|
# Speichern der Haupt-Note
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user