WP24c - Agentic Edge Validation & Chunk-Aware Multigraph-System (v4.5.8) #22
|
|
@ -1,10 +1,7 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/ingestion/ingestion_db.py
|
FILE: app/core/ingestion/ingestion_db.py
|
||||||
DESCRIPTION: Datenbank-Schnittstelle für Note-Metadaten und Artefakt-Prüfung.
|
DESCRIPTION: Datenbank-Schnittstelle für Note-Metadaten und Artefakt-Prüfung.
|
||||||
WP-14: Umstellung auf zentrale database-Infrastruktur.
|
|
||||||
WP-24c: Implementierung der herkunftsbasierten Lösch-Logik (Origin-Purge).
|
WP-24c: Implementierung der herkunftsbasierten Lösch-Logik (Origin-Purge).
|
||||||
Verhindert das versehentliche Löschen von inversen Kanten beim Re-Import.
|
|
||||||
VERSION v2.2.0: Integration der Authority-Prüfung für Point-IDs.
|
|
||||||
VERSION: 2.2.0 (WP-24c: Protected Purge & Authority Lookup)
|
VERSION: 2.2.0 (WP-24c: Protected Purge & Authority Lookup)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
|
|
@ -12,14 +9,12 @@ import logging
|
||||||
from typing import Optional, Tuple, List
|
from typing import Optional, Tuple, List
|
||||||
from qdrant_client import QdrantClient
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
# Import der modularisierten Namen-Logik zur Sicherstellung der Konsistenz
|
|
||||||
from app.core.database import collection_names
|
from app.core.database import collection_names
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def fetch_note_payload(client: QdrantClient, prefix: str, note_id: str) -> Optional[dict]:
|
def fetch_note_payload(client: QdrantClient, prefix: str, note_id: str) -> Optional[dict]:
|
||||||
"""Holt die Metadaten einer Note aus Qdrant via Scroll."""
|
"""Holt die Metadaten einer Note aus Qdrant."""
|
||||||
notes_col, _, _ = collection_names(prefix)
|
notes_col, _, _ = collection_names(prefix)
|
||||||
try:
|
try:
|
||||||
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
|
|
@ -30,10 +25,9 @@ def fetch_note_payload(client: QdrantClient, prefix: str, note_id: str) -> Optio
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def artifacts_missing(client: QdrantClient, prefix: str, note_id: str) -> Tuple[bool, bool]:
|
def artifacts_missing(client: QdrantClient, prefix: str, note_id: str) -> Tuple[bool, bool]:
|
||||||
"""Prüft Qdrant aktiv auf vorhandene Chunks und Edges für eine Note."""
|
"""Prüft auf vorhandene Chunks und Edges."""
|
||||||
_, chunks_col, edges_col = collection_names(prefix)
|
_, chunks_col, edges_col = collection_names(prefix)
|
||||||
try:
|
try:
|
||||||
# Filter für die Existenz-Prüfung (Klassisch via note_id)
|
|
||||||
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
c_pts, _ = client.scroll(collection_name=chunks_col, scroll_filter=f, limit=1)
|
c_pts, _ = client.scroll(collection_name=chunks_col, scroll_filter=f, limit=1)
|
||||||
e_pts, _ = client.scroll(collection_name=edges_col, scroll_filter=f, limit=1)
|
e_pts, _ = client.scroll(collection_name=edges_col, scroll_filter=f, limit=1)
|
||||||
|
|
@ -45,17 +39,12 @@ def artifacts_missing(client: QdrantClient, prefix: str, note_id: str) -> Tuple[
|
||||||
def is_explicit_edge_present(client: QdrantClient, prefix: str, edge_id: str) -> bool:
|
def is_explicit_edge_present(client: QdrantClient, prefix: str, edge_id: str) -> bool:
|
||||||
"""
|
"""
|
||||||
WP-24c: Prüft, ob eine Kante mit der gegebenen ID bereits als 'explizit' existiert.
|
WP-24c: Prüft, ob eine Kante mit der gegebenen ID bereits als 'explizit' existiert.
|
||||||
Wird vom IngestionProcessor genutzt, um das Überschreiben von manuellem Wissen
|
Verhindert das Überschreiben von manuellem Wissen durch Symmetrie-Kanten.
|
||||||
durch virtuelle Symmetrie-Kanten zu verhindern.
|
|
||||||
"""
|
"""
|
||||||
_, _, edges_col = collection_names(prefix)
|
_, _, edges_col = collection_names(prefix)
|
||||||
try:
|
try:
|
||||||
# retrieve erwartet eine Liste von IDs
|
# retrieve ist der schnellste Weg, um einen Punkt via ID zu laden
|
||||||
res = client.retrieve(
|
res = client.retrieve(collection_name=edges_col, ids=[edge_id], with_payload=True)
|
||||||
collection_name=edges_col,
|
|
||||||
ids=[edge_id],
|
|
||||||
with_payload=True
|
|
||||||
)
|
|
||||||
if res and not res[0].payload.get("virtual", False):
|
if res and not res[0].payload.get("virtual", False):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
@ -63,37 +52,15 @@ def is_explicit_edge_present(client: QdrantClient, prefix: str, edge_id: str) ->
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def purge_artifacts(client: QdrantClient, prefix: str, note_id: str):
|
def purge_artifacts(client: QdrantClient, prefix: str, note_id: str):
|
||||||
"""
|
"""Löscht Artefakte basierend auf ihrer Herkunft (Origin)."""
|
||||||
WP-24c: Selektives Löschen von Artefakten vor einem Re-Import.
|
|
||||||
Implementiert das Origin-Purge-Prinzip zur Sicherung der bidirektionalen Graph-Integrität.
|
|
||||||
"""
|
|
||||||
_, chunks_col, edges_col = collection_names(prefix)
|
_, chunks_col, edges_col = collection_names(prefix)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Chunks löschen (immer fest an die note_id gebunden)
|
chunks_filter = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
chunks_filter = rest.Filter(must=[
|
client.delete(collection_name=chunks_col, points_selector=rest.FilterSelector(filter=chunks_filter))
|
||||||
rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))
|
|
||||||
])
|
|
||||||
client.delete(
|
|
||||||
collection_name=chunks_col,
|
|
||||||
points_selector=rest.FilterSelector(filter=chunks_filter)
|
|
||||||
)
|
|
||||||
|
|
||||||
# 2. WP-24c: Kanten löschen (HERKUNFTS-BASIERT)
|
|
||||||
# Wir löschen alle Kanten, die von DIESER Note erzeugt wurden (origin_note_id).
|
|
||||||
# Dies umfasst:
|
|
||||||
# - Alle ausgehenden Kanten (A -> B)
|
|
||||||
# - Alle inversen Kanten, die diese Note in anderen Notizen "deponiert" hat (B -> A)
|
|
||||||
# Fremde inverse Kanten (C -> A), die von anderen Notizen stammen, bleiben erhalten.
|
|
||||||
edges_filter = rest.Filter(must=[
|
|
||||||
rest.FieldCondition(key="origin_note_id", match=rest.MatchValue(value=note_id))
|
|
||||||
])
|
|
||||||
client.delete(
|
|
||||||
collection_name=edges_col,
|
|
||||||
points_selector=rest.FilterSelector(filter=edges_filter)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# Origin-basiertes Löschen schützt fremde inverse Kanten
|
||||||
|
edges_filter = rest.Filter(must=[rest.FieldCondition(key="origin_note_id", match=rest.MatchValue(value=note_id))])
|
||||||
|
client.delete(collection_name=edges_col, points_selector=rest.FilterSelector(filter=edges_filter))
|
||||||
logger.info(f"🧹 [PURGE] Global artifacts owned by '{note_id}' cleared.")
|
logger.info(f"🧹 [PURGE] Global artifacts owned by '{note_id}' cleared.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ [PURGE ERROR] Failed to clear artifacts for {note_id}: {e}")
|
logger.error(f"❌ [PURGE ERROR] Failed to clear artifacts for {note_id}: {e}")
|
||||||
|
|
@ -22,7 +22,7 @@ from app.core.parser import (
|
||||||
validate_required_frontmatter, NoteContext
|
validate_required_frontmatter, NoteContext
|
||||||
)
|
)
|
||||||
from app.core.chunking import assemble_chunks
|
from app.core.chunking import assemble_chunks
|
||||||
# WP-24c: Import für die deterministische UUID-Vorabberechnung
|
# WP-24c: Import für die deterministische ID-Vorabberechnung
|
||||||
from app.core.graph.graph_utils import _mk_edge_id
|
from app.core.graph.graph_utils import _mk_edge_id
|
||||||
|
|
||||||
# Datenbank-Ebene (Modularisierte database-Infrastruktur)
|
# Datenbank-Ebene (Modularisierte database-Infrastruktur)
|
||||||
|
|
@ -83,7 +83,7 @@ class IngestionService:
|
||||||
# WP-15b: Kontext-Gedächtnis für ID-Auflösung
|
# WP-15b: Kontext-Gedächtnis für ID-Auflösung
|
||||||
self.batch_cache: Dict[str, NoteContext] = {}
|
self.batch_cache: Dict[str, NoteContext] = {}
|
||||||
|
|
||||||
# WP-24c: Puffer für Phase 2 (Symmetrie-Injektion)
|
# WP-24c: Puffer für Phase 2 (Symmetrie-Injektion nach Persistierung)
|
||||||
self.symmetry_buffer: List[Dict[str, Any]] = []
|
self.symmetry_buffer: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -143,7 +143,7 @@ class IngestionService:
|
||||||
success_count += 1
|
success_count += 1
|
||||||
|
|
||||||
# 3. Schritt: SYMMETRY INJECTION (PHASE 2)
|
# 3. Schritt: SYMMETRY INJECTION (PHASE 2)
|
||||||
# Erst jetzt, wo alle manuellen Kanten in Qdrant liegen, prüfen wir die Symmetrien.
|
# Erst jetzt, wo alle manuellen Kanten in Qdrant liegen, schreiben wir die Symmetrien.
|
||||||
if self.symmetry_buffer:
|
if self.symmetry_buffer:
|
||||||
logger.info(f"🔄 PHASE 2: Validiere {len(self.symmetry_buffer)} Symmetrie-Kanten gegen Live-DB...")
|
logger.info(f"🔄 PHASE 2: Validiere {len(self.symmetry_buffer)} Symmetrie-Kanten gegen Live-DB...")
|
||||||
final_virtuals = []
|
final_virtuals = []
|
||||||
|
|
@ -244,7 +244,6 @@ class IngestionService:
|
||||||
is_valid = await validate_edge_candidate(
|
is_valid = await validate_edge_candidate(
|
||||||
ch.text, cand, self.batch_cache, self.llm, profile_name="ingest_validator"
|
ch.text, cand, self.batch_cache, self.llm, profile_name="ingest_validator"
|
||||||
)
|
)
|
||||||
# Fix (v3.3.2): Sicherer Zugriff via .get() verhindert Crash
|
|
||||||
t_id = cand.get('target_id') or cand.get('note_id') or "Unknown"
|
t_id = cand.get('target_id') or cand.get('note_id') or "Unknown"
|
||||||
logger.info(f" 🧠 [SMART EDGE] {t_id} -> {'✅ OK' if is_valid else '❌ SKIP'}")
|
logger.info(f" 🧠 [SMART EDGE] {t_id} -> {'✅ OK' if is_valid else '❌ SKIP'}")
|
||||||
if is_valid: new_pool.append(cand)
|
if is_valid: new_pool.append(cand)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user