mindnet/app/core/ingestion/ingestion_validation.py

53 lines
1.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
FILE: app/core/ingestion/ingestion_validation.py
DESCRIPTION: WP-15b semantische Validierung von Kanten gegen den LocalBatchCache.
"""
import logging
from typing import Dict, Any
from app.core.parser import NoteContext
logger = logging.getLogger(__name__)
async def validate_edge_candidate(
chunk_text: str,
edge: Dict,
batch_cache: Dict[str, NoteContext],
llm_service: Any,
provider: str
) -> bool:
"""WP-15b: Validiert einen Kandidaten semantisch gegen das Ziel im Cache."""
target_id = edge.get("to")
target_ctx = batch_cache.get(target_id)
# Robust Lookup Fix (v2.12.2): Support für Anker
if not target_ctx and "#" in target_id:
base_id = target_id.split("#")[0]
target_ctx = batch_cache.get(base_id)
# Sicherheits-Fallback (Hard-Link Integrity)
if not target_ctx:
logger.info(f" [VALIDATION SKIP] No context for '{target_id}' - allowing link.")
return True
template = llm_service.get_prompt("edge_validation", provider)
try:
logger.info(f"⚖️ [VALIDATING] Relation '{edge.get('kind')}' -> '{target_id}'...")
prompt = template.format(
chunk_text=chunk_text[:1500],
target_title=target_ctx.title,
target_summary=target_ctx.summary,
edge_kind=edge.get("kind", "related_to")
)
response = await llm_service.generate_raw_response(prompt, priority="background")
is_valid = "YES" in response.upper()
if is_valid:
logger.info(f"✅ [VALIDATED] Relation to '{target_id}' confirmed.")
else:
logger.info(f"🚫 [REJECTED] Relation to '{target_id}' irrelevant for this chunk.")
return is_valid
except Exception as e:
logger.warning(f"⚠️ Validation error for {target_id}: {e}")
return True