- Introduced a new function `load_graph_schema_full` to parse and cache both typical and prohibited edge types from the graph schema. - Updated `load_graph_schema` to utilize the full schema for improved edge type extraction. - Added `get_topology_info` to retrieve typical and prohibited edges for source/target pairs. - Implemented `validate_intra_note_edge` and `validate_edge_against_schema` for schema validation of intra-note edges. - Enhanced logging for schema validation outcomes and edge handling. - Updated documentation to reflect new validation features and testing procedures.
332 lines
12 KiB
Python
332 lines
12 KiB
Python
"""
|
|
FILE: tests/test_wp26_phase3_validation.py
|
|
DESCRIPTION: Unit-Tests für WP-26 Phase 3: Schema-Validierung für Intra-Note-Edges
|
|
- FA-12: Validierung gegen effektiven Chunk-Typ
|
|
- get_topology_info() Funktion
|
|
- validate_intra_note_edge() Funktion
|
|
VERSION: 1.0.0
|
|
"""
|
|
import pytest
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
class TestLoadGraphSchemaFull:
|
|
"""UT-25: Erweitertes Schema-Laden mit prohibited Edges"""
|
|
|
|
def test_load_graph_schema_full_returns_dict(self):
|
|
"""Schema wird korrekt geladen"""
|
|
from app.core.graph.graph_utils import load_graph_schema_full, clear_graph_schema_cache
|
|
|
|
clear_graph_schema_cache()
|
|
schema = load_graph_schema_full()
|
|
|
|
assert isinstance(schema, dict)
|
|
|
|
def test_schema_contains_typical_and_prohibited(self):
|
|
"""Schema enthält sowohl typical als auch prohibited Listen"""
|
|
from app.core.graph.graph_utils import load_graph_schema_full, clear_graph_schema_cache
|
|
|
|
clear_graph_schema_cache()
|
|
schema = load_graph_schema_full()
|
|
|
|
# Prüfe, dass mindestens ein Eintrag existiert
|
|
if schema:
|
|
for source_type, targets in schema.items():
|
|
for target_type, edge_info in targets.items():
|
|
assert "typical" in edge_info
|
|
assert "prohibited" in edge_info
|
|
assert isinstance(edge_info["typical"], list)
|
|
assert isinstance(edge_info["prohibited"], list)
|
|
|
|
|
|
class TestGetTopologyInfo:
|
|
"""UT-26: get_topology_info() Funktion"""
|
|
|
|
def test_get_topology_info_returns_dict(self):
|
|
"""get_topology_info() gibt Dict mit typical und prohibited zurück"""
|
|
from app.core.graph.graph_utils import get_topology_info, clear_graph_schema_cache
|
|
|
|
clear_graph_schema_cache()
|
|
topology = get_topology_info("experience", "insight")
|
|
|
|
assert isinstance(topology, dict)
|
|
assert "typical" in topology
|
|
assert "prohibited" in topology
|
|
|
|
def test_get_topology_info_fallback(self):
|
|
"""Fallback für unbekannte Typen gibt Defaults zurück"""
|
|
from app.core.graph.graph_utils import get_topology_info, clear_graph_schema_cache
|
|
|
|
clear_graph_schema_cache()
|
|
topology = get_topology_info("unknown_type_xyz", "another_unknown")
|
|
|
|
# Fallback sollte mindestens related_to oder references enthalten
|
|
assert isinstance(topology["typical"], list)
|
|
assert isinstance(topology["prohibited"], list)
|
|
|
|
def test_get_topology_info_experience_to_insight(self):
|
|
"""Typische Edge von experience zu insight"""
|
|
from app.core.graph.graph_utils import get_topology_info, clear_graph_schema_cache
|
|
|
|
clear_graph_schema_cache()
|
|
topology = get_topology_info("experience", "insight")
|
|
|
|
# Basierend auf graph_schema.md
|
|
assert len(topology["typical"]) > 0 or len(topology["prohibited"]) == 0
|
|
|
|
|
|
class TestValidateIntraNoteEdge:
|
|
"""UT-27: validate_intra_note_edge() Funktion"""
|
|
|
|
def test_validate_typical_edge_returns_true(self):
|
|
"""Typische Edge wird akzeptiert mit Confidence 1.0"""
|
|
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
|
|
|
# Mock-Daten
|
|
edge = {"kind": "resulted_in", "source_id": "chunk1", "target_id": "chunk2"}
|
|
source_chunk = {"type": "experience"}
|
|
target_chunk = {"type": "insight"}
|
|
|
|
is_valid, confidence, reason = validate_intra_note_edge(
|
|
edge=edge,
|
|
source_chunk=source_chunk,
|
|
target_chunk=target_chunk,
|
|
strict_mode=False
|
|
)
|
|
|
|
# Edge sollte akzeptiert werden
|
|
assert is_valid is True
|
|
assert confidence >= 0.7 # Mindestens 0.7 (atypisch) oder 1.0 (typisch)
|
|
|
|
def test_validate_atypical_edge_reduced_confidence(self):
|
|
"""Atypische Edge wird akzeptiert mit reduzierter Confidence"""
|
|
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
|
|
|
# Mock-Daten mit sehr ungewöhnlicher Edge
|
|
edge = {"kind": "very_unusual_edge_type_xyz", "source_id": "chunk1", "target_id": "chunk2"}
|
|
source_chunk = {"type": "experience"}
|
|
target_chunk = {"type": "insight"}
|
|
|
|
is_valid, confidence, reason = validate_intra_note_edge(
|
|
edge=edge,
|
|
source_chunk=source_chunk,
|
|
target_chunk=target_chunk,
|
|
strict_mode=False
|
|
)
|
|
|
|
# Atypische Edge sollte akzeptiert werden, aber mit reduzierter Confidence
|
|
assert is_valid is True
|
|
assert confidence == 0.7
|
|
assert reason is not None
|
|
|
|
def test_validate_atypical_edge_strict_mode_rejected(self):
|
|
"""Atypische Edge wird im Strict-Mode abgelehnt"""
|
|
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
|
|
|
# Mock-Daten
|
|
edge = {"kind": "very_unusual_edge_type_xyz", "source_id": "chunk1", "target_id": "chunk2"}
|
|
source_chunk = {"type": "experience"}
|
|
target_chunk = {"type": "insight"}
|
|
|
|
is_valid, confidence, reason = validate_intra_note_edge(
|
|
edge=edge,
|
|
source_chunk=source_chunk,
|
|
target_chunk=target_chunk,
|
|
strict_mode=True
|
|
)
|
|
|
|
# Im Strict-Mode sollte die Edge abgelehnt werden
|
|
assert is_valid is False
|
|
assert confidence == 0.0
|
|
|
|
def test_validate_uses_effective_type(self):
|
|
"""Validierung verwendet effektiven Typ (section_type über note_type)"""
|
|
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
|
|
|
# Chunk hat sowohl type (effektiv) als auch note_type
|
|
edge = {"kind": "related_to", "source_id": "chunk1", "target_id": "chunk2"}
|
|
source_chunk = {"type": "insight", "note_type": "experience"} # type hat Vorrang
|
|
target_chunk = {"type": "decision", "note_type": "experience"}
|
|
|
|
is_valid, confidence, reason = validate_intra_note_edge(
|
|
edge=edge,
|
|
source_chunk=source_chunk,
|
|
target_chunk=target_chunk,
|
|
strict_mode=False
|
|
)
|
|
|
|
# Edge sollte gegen insight->decision validiert werden
|
|
assert is_valid is True
|
|
|
|
|
|
class TestValidateEdgeAgainstSchema:
|
|
"""UT-28: validate_edge_against_schema() Wrapper-Funktion"""
|
|
|
|
def test_non_internal_edge_passes(self):
|
|
"""Nicht-interne Edges werden ohne Schema-Check durchgelassen"""
|
|
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
|
|
|
edge = {
|
|
"kind": "references",
|
|
"source_id": "note1#chunk1",
|
|
"target_id": "note2#chunk1",
|
|
"is_internal": False
|
|
}
|
|
chunks_by_id = {}
|
|
|
|
is_valid, updated_edge = validate_edge_against_schema(
|
|
edge=edge,
|
|
chunks_by_id=chunks_by_id,
|
|
strict_mode=False
|
|
)
|
|
|
|
assert is_valid is True
|
|
assert updated_edge == edge
|
|
|
|
def test_internal_edge_validated(self):
|
|
"""Interne Edges werden gegen Schema validiert"""
|
|
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
|
|
|
edge = {
|
|
"kind": "derived_from",
|
|
"source_id": "chunk1",
|
|
"target_id": "chunk2",
|
|
"is_internal": True,
|
|
"confidence": 1.0
|
|
}
|
|
chunks_by_id = {
|
|
"chunk1": {"type": "insight"},
|
|
"chunk2": {"type": "experience"}
|
|
}
|
|
|
|
is_valid, updated_edge = validate_edge_against_schema(
|
|
edge=edge,
|
|
chunks_by_id=chunks_by_id,
|
|
strict_mode=False
|
|
)
|
|
|
|
assert is_valid is True
|
|
|
|
def test_missing_chunks_passes(self):
|
|
"""Wenn Chunks nicht gefunden werden, wird Edge erlaubt (Integrität vor Präzision)"""
|
|
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
|
|
|
edge = {
|
|
"kind": "derived_from",
|
|
"source_id": "chunk1",
|
|
"target_id": "chunk2",
|
|
"is_internal": True
|
|
}
|
|
chunks_by_id = {} # Keine Chunks
|
|
|
|
is_valid, updated_edge = validate_edge_against_schema(
|
|
edge=edge,
|
|
chunks_by_id=chunks_by_id,
|
|
strict_mode=False
|
|
)
|
|
|
|
assert is_valid is True
|
|
|
|
|
|
class TestSchemaValidationIntegration:
|
|
"""UT-29: Integration der Schema-Validierung"""
|
|
|
|
def test_clear_cache_clears_both_caches(self):
|
|
"""clear_graph_schema_cache() löscht beide Caches"""
|
|
from app.core.graph.graph_utils import (
|
|
load_graph_schema,
|
|
load_graph_schema_full,
|
|
clear_graph_schema_cache,
|
|
_GRAPH_SCHEMA_CACHE,
|
|
_GRAPH_SCHEMA_FULL_CACHE
|
|
)
|
|
|
|
# Lade beide Schemas
|
|
load_graph_schema()
|
|
load_graph_schema_full()
|
|
|
|
# Cache leeren
|
|
clear_graph_schema_cache()
|
|
|
|
# Module-Level Variablen prüfen (Zugriff über import)
|
|
import app.core.graph.graph_utils as utils_module
|
|
assert utils_module._GRAPH_SCHEMA_CACHE is None
|
|
assert utils_module._GRAPH_SCHEMA_FULL_CACHE is None
|
|
|
|
def test_topology_info_consistent_with_typical_edges(self):
|
|
"""get_topology_info() ist konsistent mit get_typical_edge_for()"""
|
|
from app.core.graph.graph_utils import (
|
|
get_topology_info,
|
|
get_typical_edge_for,
|
|
clear_graph_schema_cache
|
|
)
|
|
|
|
clear_graph_schema_cache()
|
|
|
|
# Test für experience -> insight
|
|
topology = get_topology_info("experience", "insight")
|
|
typical_edge = get_typical_edge_for("experience", "insight")
|
|
|
|
# Wenn get_typical_edge_for einen Wert zurückgibt, sollte er in typical sein
|
|
if typical_edge and topology["typical"]:
|
|
assert typical_edge in topology["typical"]
|
|
|
|
|
|
class TestConfidenceAdjustment:
|
|
"""UT-30: Confidence-Anpassung bei atypischen Edges"""
|
|
|
|
def test_atypical_edge_confidence_reduced(self):
|
|
"""Atypische Edge erhält reduzierte Confidence (0.7)"""
|
|
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
|
|
|
edge = {
|
|
"kind": "completely_unknown_edge_type_xyz123",
|
|
"source_id": "chunk1",
|
|
"target_id": "chunk2",
|
|
"is_internal": True,
|
|
"confidence": 1.0
|
|
}
|
|
chunks_by_id = {
|
|
"chunk1": {"type": "experience"},
|
|
"chunk2": {"type": "insight"}
|
|
}
|
|
|
|
is_valid, updated_edge = validate_edge_against_schema(
|
|
edge=edge,
|
|
chunks_by_id=chunks_by_id,
|
|
strict_mode=False
|
|
)
|
|
|
|
assert is_valid is True
|
|
# Confidence sollte auf 0.7 reduziert worden sein (da atypisch)
|
|
assert updated_edge.get("confidence") == 0.7
|
|
|
|
def test_schema_validation_note_added(self):
|
|
"""Atypische Edge erhält Validierungs-Notiz"""
|
|
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
|
|
|
edge = {
|
|
"kind": "completely_unknown_edge_type_xyz123",
|
|
"source_id": "chunk1",
|
|
"target_id": "chunk2",
|
|
"is_internal": True,
|
|
"confidence": 1.0
|
|
}
|
|
chunks_by_id = {
|
|
"chunk1": {"type": "experience"},
|
|
"chunk2": {"type": "insight"}
|
|
}
|
|
|
|
is_valid, updated_edge = validate_edge_against_schema(
|
|
edge=edge,
|
|
chunks_by_id=chunks_by_id,
|
|
strict_mode=False
|
|
)
|
|
|
|
# Validierungs-Notiz sollte hinzugefügt worden sein
|
|
assert "schema_validation_note" in updated_edge
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|