""" FILE: tests/test_wp26_phase2_retriever.py DESCRIPTION: Unit-Tests für WP-26 Phase 2: Retriever-Anpassungen - is_internal-Boost für Intra-Note-Edges - Konfigurierbare Aggregation (Note/Chunk Level) VERSION: 1.0.0 """ import pytest from unittest.mock import patch, MagicMock import os class TestEdgeScoringConfig: """UT-19: Edge-Scoring-Konfiguration""" def test_get_edge_scoring_config_defaults(self): """Default-Werte werden korrekt geladen""" from app.core.graph.graph_subgraph import get_edge_scoring_config # Cache leeren get_edge_scoring_config.cache_clear() # Mit nicht-existierender Config-Datei with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}): get_edge_scoring_config.cache_clear() config = get_edge_scoring_config() assert config["internal_edge_boost"] == 1.2 assert config["external_edge_boost"] == 1.0 def test_get_edge_scoring_config_from_yaml(self): """Werte werden aus YAML geladen""" from app.core.graph.graph_subgraph import get_edge_scoring_config # Cache leeren und echte Config laden get_edge_scoring_config.cache_clear() # Mit echter Config-Datei config = get_edge_scoring_config() # Die Werte sollten den Defaults entsprechen (aus retriever.yaml) assert config["internal_edge_boost"] >= 1.0 assert config["external_edge_boost"] >= 1.0 class TestIsInternalBoost: """UT-20: is_internal-Boost im Subgraph""" def test_internal_edge_gets_boost(self): """Intra-Note-Edges erhalten höheres Gewicht""" from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config # Cache leeren get_edge_scoring_config.cache_clear() sg = Subgraph() # Interne Edge (innerhalb derselben Note) sg.add_edge({ "source": "note1#c01", "target": "note1#c02", "kind": "derives", "weight": 1.0, "is_internal": True }) # Prüfe, dass das Gewicht erhöht wurde edges = sg.adj.get("note1#c01", []) assert len(edges) == 1 internal_boost = get_edge_scoring_config()["internal_edge_boost"] assert edges[0]["weight"] == 1.0 * internal_boost assert edges[0]["is_internal"] is True def test_external_edge_no_boost(self): """Inter-Note-Edges erhalten keinen Boost""" from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config # Cache leeren get_edge_scoring_config.cache_clear() sg = Subgraph() # Externe Edge (zwischen verschiedenen Notes) sg.add_edge({ "source": "note1#c01", "target": "note2#c01", "kind": "references", "weight": 1.0, "is_internal": False }) edges = sg.adj.get("note1#c01", []) assert len(edges) == 1 external_boost = get_edge_scoring_config()["external_edge_boost"] assert edges[0]["weight"] == 1.0 * external_boost assert edges[0]["is_internal"] is False def test_edge_bonus_aggregation_with_internal(self): """Edge-Bonus aggregiert korrekt mit is_internal-Boost""" from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config get_edge_scoring_config.cache_clear() sg = Subgraph() # Zwei Edges: eine interne, eine externe sg.add_edge({ "source": "note1", "target": "note2", "kind": "solves", "weight": 1.5, "is_internal": True }) sg.add_edge({ "source": "note1", "target": "note3", "kind": "references", "weight": 0.1, "is_internal": False }) # Aggregierter Bonus bonus = sg.edge_bonus("note1") # Sollte > 0 sein assert bonus > 0 class TestAggregationConfig: """UT-21: Aggregation-Konfiguration""" def test_get_aggregation_config_defaults(self): """Default-Werte werden korrekt geladen""" from app.core.retrieval.retriever import _get_aggregation_config # Mit nicht-existierender Config-Datei with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}): config = _get_aggregation_config() assert config["level"] == "note" assert config["max_chunks_per_note"] == 3 def test_get_aggregation_config_from_yaml(self): """Werte werden aus YAML geladen""" from app.core.retrieval.retriever import _get_aggregation_config config = _get_aggregation_config() # Die Werte sollten aus retriever.yaml kommen assert config["level"] in ["note", "chunk"] assert config["max_chunks_per_note"] >= 1 class TestNoteLevelAggregation: """UT-22: Note-Level Aggregation mit max_chunks_per_note""" def test_note_level_limits_chunks(self): """Note-Level-Aggregation limitiert Chunks pro Note""" # Mock-Daten: 5 Chunks von Note1, 3 Chunks von Note2 mock_hits = [ ("c1", 0.9, {"note_id": "note1", "chunk_id": "c1"}), ("c2", 0.85, {"note_id": "note1", "chunk_id": "c2"}), ("c3", 0.8, {"note_id": "note2", "chunk_id": "c3"}), ("c4", 0.75, {"note_id": "note1", "chunk_id": "c4"}), ("c5", 0.7, {"note_id": "note2", "chunk_id": "c5"}), ("c6", 0.65, {"note_id": "note1", "chunk_id": "c6"}), ("c7", 0.6, {"note_id": "note1", "chunk_id": "c7"}), ] # Simuliere Note-Level-Aggregation mit max_chunks_per_note=2 max_chunks_per_note = 2 pooled = [] note_count = {} for pid, score, payload in sorted(mock_hits, key=lambda x: x[1], reverse=True): note_id = payload["note_id"] if note_count.get(note_id, 0) < max_chunks_per_note: pooled.append((pid, score, payload)) note_count[note_id] = note_count.get(note_id, 0) + 1 # Erwartung: 2 von note1, 2 von note2 = 4 Chunks assert len(pooled) == 4 # Prüfe, dass jede Note maximal 2 Chunks hat note1_chunks = [p for p in pooled if p[2]["note_id"] == "note1"] note2_chunks = [p for p in pooled if p[2]["note_id"] == "note2"] assert len(note1_chunks) == 2 assert len(note2_chunks) == 2 class TestChunkLevelAggregation: """UT-23: Chunk-Level Aggregation (keine Deduplizierung)""" def test_chunk_level_no_dedup(self): """Chunk-Level-Aggregation gibt alle Chunks zurück""" mock_hits = [ ("c1", 0.9, {"note_id": "note1"}), ("c2", 0.85, {"note_id": "note1"}), ("c3", 0.8, {"note_id": "note1"}), ("c4", 0.75, {"note_id": "note1"}), ("c5", 0.7, {"note_id": "note1"}), ] # Chunk-Level: Keine Deduplizierung aggregation_level = "chunk" if aggregation_level == "chunk": pooled = mock_hits else: pooled = [] # Note-Level würde nur 1 behalten # Alle 5 Chunks sollten erhalten bleiben assert len(pooled) == 5 class TestQdrantIndexSetup: """UT-24: Qdrant-Index-Setup""" def test_bool_index_method_exists(self): """create_bool_index Methode existiert""" from scripts.setup_mindnet_collections import QdrantHTTP q = QdrantHTTP("http://localhost:6333") assert hasattr(q, "create_bool_index") def test_setup_includes_is_internal_index(self): """Setup-Funktion enthält is_internal Index""" import inspect from scripts.setup_mindnet_collections import setup_mindnet_collections # Prüfe den Quellcode der Funktion source = inspect.getsource(setup_mindnet_collections) assert "is_internal" in source assert "create_bool_index" in source if __name__ == "__main__": pytest.main([__file__, "-v"])