mindnet/tests/test_wp26_phase2_retriever.py

"""
FILE: tests/test_wp26_phase2_retriever.py
DESCRIPTION: Unit-Tests für WP-26 Phase 2: Retriever-Anpassungen
             - is_internal-Boost für Intra-Note-Edges
             - Konfigurierbare Aggregation (Note/Chunk Level)
VERSION: 1.0.0
"""
import pytest
from unittest.mock import patch, MagicMock
import os


class TestEdgeScoringConfig:
    """UT-19: Edge-Scoring-Konfiguration"""

    def test_get_edge_scoring_config_defaults(self):
        """Default-Werte werden korrekt geladen"""
        from app.core.graph.graph_subgraph import get_edge_scoring_config

        # Cache leeren
        get_edge_scoring_config.cache_clear()

        # Mit nicht-existierender Config-Datei
        with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}):
            get_edge_scoring_config.cache_clear()
            config = get_edge_scoring_config()

        assert config["internal_edge_boost"] == 1.2
        assert config["external_edge_boost"] == 1.0

    def test_get_edge_scoring_config_from_yaml(self):
        """Werte werden aus YAML geladen"""
        from app.core.graph.graph_subgraph import get_edge_scoring_config

        # Cache leeren und echte Config laden
        get_edge_scoring_config.cache_clear()

        # Mit echter Config-Datei
        config = get_edge_scoring_config()

        # Die Werte sollten den Defaults entsprechen (aus retriever.yaml)
        assert config["internal_edge_boost"] >= 1.0
        assert config["external_edge_boost"] >= 1.0


class TestIsInternalBoost:
    """UT-20: is_internal-Boost im Subgraph"""

    def test_internal_edge_gets_boost(self):
        """Intra-Note-Edges erhalten höheres Gewicht"""
        from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config

        # Cache leeren
        get_edge_scoring_config.cache_clear()

        sg = Subgraph()

        # Interne Edge (innerhalb derselben Note)
        sg.add_edge({
            "source": "note1#c01",
            "target": "note1#c02",
            "kind": "derives",
            "weight": 1.0,
            "is_internal": True
        })

        # Prüfe, dass das Gewicht erhöht wurde
        edges = sg.adj.get("note1#c01", [])
        assert len(edges) == 1

        internal_boost = get_edge_scoring_config()["internal_edge_boost"]
        assert edges[0]["weight"] == 1.0 * internal_boost
        assert edges[0]["is_internal"] is True

    def test_external_edge_no_boost(self):
        """Inter-Note-Edges erhalten keinen Boost"""
        from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config

        # Cache leeren
        get_edge_scoring_config.cache_clear()

        sg = Subgraph()

        # Externe Edge (zwischen verschiedenen Notes)
        sg.add_edge({
            "source": "note1#c01",
            "target": "note2#c01",
            "kind": "references",
            "weight": 1.0,
            "is_internal": False
        })

        edges = sg.adj.get("note1#c01", [])
        assert len(edges) == 1

        external_boost = get_edge_scoring_config()["external_edge_boost"]
        assert edges[0]["weight"] == 1.0 * external_boost
        assert edges[0]["is_internal"] is False

    def test_edge_bonus_aggregation_with_internal(self):
        """Edge-Bonus aggregiert korrekt mit is_internal-Boost"""
        from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config

        get_edge_scoring_config.cache_clear()
        sg = Subgraph()

        # Zwei Edges: eine interne, eine externe
        sg.add_edge({
            "source": "note1",
            "target": "note2",
            "kind": "solves",
            "weight": 1.5,
            "is_internal": True
        })
        sg.add_edge({
            "source": "note1",
            "target": "note3",
            "kind": "references",
            "weight": 0.1,
            "is_internal": False
        })

        # Aggregierter Bonus
        bonus = sg.edge_bonus("note1")

        # Sollte > 0 sein
        assert bonus > 0


class TestAggregationConfig:
    """UT-21: Aggregation-Konfiguration"""

    def test_get_aggregation_config_defaults(self):
        """Default-Werte werden korrekt geladen"""
        from app.core.retrieval.retriever import _get_aggregation_config

        # Mit nicht-existierender Config-Datei
        with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}):
            config = _get_aggregation_config()

        assert config["level"] == "note"
        assert config["max_chunks_per_note"] == 3

    def test_get_aggregation_config_from_yaml(self):
        """Werte werden aus YAML geladen"""
        from app.core.retrieval.retriever import _get_aggregation_config

        config = _get_aggregation_config()

        # Die Werte sollten aus retriever.yaml kommen
        assert config["level"] in ["note", "chunk"]
        assert config["max_chunks_per_note"] >= 1


class TestNoteLevelAggregation:
    """UT-22: Note-Level Aggregation mit max_chunks_per_note"""

    def test_note_level_limits_chunks(self):
        """Note-Level-Aggregation limitiert Chunks pro Note"""
        # Mock-Daten: 5 Chunks von Note1, 3 Chunks von Note2
        mock_hits = [
            ("c1", 0.9, {"note_id": "note1", "chunk_id": "c1"}),
            ("c2", 0.85, {"note_id": "note1", "chunk_id": "c2"}),
            ("c3", 0.8, {"note_id": "note2", "chunk_id": "c3"}),
            ("c4", 0.75, {"note_id": "note1", "chunk_id": "c4"}),
            ("c5", 0.7, {"note_id": "note2", "chunk_id": "c5"}),
            ("c6", 0.65, {"note_id": "note1", "chunk_id": "c6"}),
            ("c7", 0.6, {"note_id": "note1", "chunk_id": "c7"}),
        ]

        # Simuliere Note-Level-Aggregation mit max_chunks_per_note=2
        max_chunks_per_note = 2
        pooled = []
        note_count = {}

        for pid, score, payload in sorted(mock_hits, key=lambda x: x[1], reverse=True):
            note_id = payload["note_id"]
            if note_count.get(note_id, 0) < max_chunks_per_note:
                pooled.append((pid, score, payload))
                note_count[note_id] = note_count.get(note_id, 0) + 1

        # Erwartung: 2 von note1, 2 von note2 = 4 Chunks
        assert len(pooled) == 4

        # Prüfe, dass jede Note maximal 2 Chunks hat
        note1_chunks = [p for p in pooled if p[2]["note_id"] == "note1"]
        note2_chunks = [p for p in pooled if p[2]["note_id"] == "note2"]
        assert len(note1_chunks) == 2
        assert len(note2_chunks) == 2


class TestChunkLevelAggregation:
    """UT-23: Chunk-Level Aggregation (keine Deduplizierung)"""

    def test_chunk_level_no_dedup(self):
        """Chunk-Level-Aggregation gibt alle Chunks zurück"""
        mock_hits = [
            ("c1", 0.9, {"note_id": "note1"}),
            ("c2", 0.85, {"note_id": "note1"}),
            ("c3", 0.8, {"note_id": "note1"}),
            ("c4", 0.75, {"note_id": "note1"}),
            ("c5", 0.7, {"note_id": "note1"}),
        ]

        # Chunk-Level: Keine Deduplizierung
        aggregation_level = "chunk"

        if aggregation_level == "chunk":
            pooled = mock_hits
        else:
            pooled = []  # Note-Level würde nur 1 behalten

        # Alle 5 Chunks sollten erhalten bleiben
        assert len(pooled) == 5


class TestQdrantIndexSetup:
    """UT-24: Qdrant-Index-Setup"""

    def test_bool_index_method_exists(self):
        """create_bool_index Methode existiert"""
        from scripts.setup_mindnet_collections import QdrantHTTP

        q = QdrantHTTP("http://localhost:6333")
        assert hasattr(q, "create_bool_index")

    def test_setup_includes_is_internal_index(self):
        """Setup-Funktion enthält is_internal Index"""
        import inspect
        from scripts.setup_mindnet_collections import setup_mindnet_collections

        # Prüfe den Quellcode der Funktion
        source = inspect.getsource(setup_mindnet_collections)

        assert "is_internal" in source
        assert "create_bool_index" in source


if __name__ == "__main__":
    pytest.main([__file__, "-v"])