- Introduced configurable edge scoring with internal and external boosts for intra-note edges. - Added aggregation configuration to support note-level and chunk-level retrieval strategies. - Updated retriever and graph subgraph modules to utilize new scoring and aggregation logic. - Enhanced YAML configuration to include new parameters for edge scoring and aggregation levels. - Added boolean indexing for filtering based on edge properties in the setup script.
241 lines
8.2 KiB
Python
241 lines
8.2 KiB
Python
"""
|
|
FILE: tests/test_wp26_phase2_retriever.py
|
|
DESCRIPTION: Unit-Tests für WP-26 Phase 2: Retriever-Anpassungen
|
|
- is_internal-Boost für Intra-Note-Edges
|
|
- Konfigurierbare Aggregation (Note/Chunk Level)
|
|
VERSION: 1.0.0
|
|
"""
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
import os
|
|
|
|
|
|
class TestEdgeScoringConfig:
|
|
"""UT-19: Edge-Scoring-Konfiguration"""
|
|
|
|
def test_get_edge_scoring_config_defaults(self):
|
|
"""Default-Werte werden korrekt geladen"""
|
|
from app.core.graph.graph_subgraph import get_edge_scoring_config
|
|
|
|
# Cache leeren
|
|
get_edge_scoring_config.cache_clear()
|
|
|
|
# Mit nicht-existierender Config-Datei
|
|
with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}):
|
|
get_edge_scoring_config.cache_clear()
|
|
config = get_edge_scoring_config()
|
|
|
|
assert config["internal_edge_boost"] == 1.2
|
|
assert config["external_edge_boost"] == 1.0
|
|
|
|
def test_get_edge_scoring_config_from_yaml(self):
|
|
"""Werte werden aus YAML geladen"""
|
|
from app.core.graph.graph_subgraph import get_edge_scoring_config
|
|
|
|
# Cache leeren und echte Config laden
|
|
get_edge_scoring_config.cache_clear()
|
|
|
|
# Mit echter Config-Datei
|
|
config = get_edge_scoring_config()
|
|
|
|
# Die Werte sollten den Defaults entsprechen (aus retriever.yaml)
|
|
assert config["internal_edge_boost"] >= 1.0
|
|
assert config["external_edge_boost"] >= 1.0
|
|
|
|
|
|
class TestIsInternalBoost:
|
|
"""UT-20: is_internal-Boost im Subgraph"""
|
|
|
|
def test_internal_edge_gets_boost(self):
|
|
"""Intra-Note-Edges erhalten höheres Gewicht"""
|
|
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
|
|
|
# Cache leeren
|
|
get_edge_scoring_config.cache_clear()
|
|
|
|
sg = Subgraph()
|
|
|
|
# Interne Edge (innerhalb derselben Note)
|
|
sg.add_edge({
|
|
"source": "note1#c01",
|
|
"target": "note1#c02",
|
|
"kind": "derives",
|
|
"weight": 1.0,
|
|
"is_internal": True
|
|
})
|
|
|
|
# Prüfe, dass das Gewicht erhöht wurde
|
|
edges = sg.adj.get("note1#c01", [])
|
|
assert len(edges) == 1
|
|
|
|
internal_boost = get_edge_scoring_config()["internal_edge_boost"]
|
|
assert edges[0]["weight"] == 1.0 * internal_boost
|
|
assert edges[0]["is_internal"] is True
|
|
|
|
def test_external_edge_no_boost(self):
|
|
"""Inter-Note-Edges erhalten keinen Boost"""
|
|
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
|
|
|
# Cache leeren
|
|
get_edge_scoring_config.cache_clear()
|
|
|
|
sg = Subgraph()
|
|
|
|
# Externe Edge (zwischen verschiedenen Notes)
|
|
sg.add_edge({
|
|
"source": "note1#c01",
|
|
"target": "note2#c01",
|
|
"kind": "references",
|
|
"weight": 1.0,
|
|
"is_internal": False
|
|
})
|
|
|
|
edges = sg.adj.get("note1#c01", [])
|
|
assert len(edges) == 1
|
|
|
|
external_boost = get_edge_scoring_config()["external_edge_boost"]
|
|
assert edges[0]["weight"] == 1.0 * external_boost
|
|
assert edges[0]["is_internal"] is False
|
|
|
|
def test_edge_bonus_aggregation_with_internal(self):
|
|
"""Edge-Bonus aggregiert korrekt mit is_internal-Boost"""
|
|
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
|
|
|
get_edge_scoring_config.cache_clear()
|
|
sg = Subgraph()
|
|
|
|
# Zwei Edges: eine interne, eine externe
|
|
sg.add_edge({
|
|
"source": "note1",
|
|
"target": "note2",
|
|
"kind": "solves",
|
|
"weight": 1.5,
|
|
"is_internal": True
|
|
})
|
|
sg.add_edge({
|
|
"source": "note1",
|
|
"target": "note3",
|
|
"kind": "references",
|
|
"weight": 0.1,
|
|
"is_internal": False
|
|
})
|
|
|
|
# Aggregierter Bonus
|
|
bonus = sg.edge_bonus("note1")
|
|
|
|
# Sollte > 0 sein
|
|
assert bonus > 0
|
|
|
|
|
|
class TestAggregationConfig:
|
|
"""UT-21: Aggregation-Konfiguration"""
|
|
|
|
def test_get_aggregation_config_defaults(self):
|
|
"""Default-Werte werden korrekt geladen"""
|
|
from app.core.retrieval.retriever import _get_aggregation_config
|
|
|
|
# Mit nicht-existierender Config-Datei
|
|
with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}):
|
|
config = _get_aggregation_config()
|
|
|
|
assert config["level"] == "note"
|
|
assert config["max_chunks_per_note"] == 3
|
|
|
|
def test_get_aggregation_config_from_yaml(self):
|
|
"""Werte werden aus YAML geladen"""
|
|
from app.core.retrieval.retriever import _get_aggregation_config
|
|
|
|
config = _get_aggregation_config()
|
|
|
|
# Die Werte sollten aus retriever.yaml kommen
|
|
assert config["level"] in ["note", "chunk"]
|
|
assert config["max_chunks_per_note"] >= 1
|
|
|
|
|
|
class TestNoteLevelAggregation:
|
|
"""UT-22: Note-Level Aggregation mit max_chunks_per_note"""
|
|
|
|
def test_note_level_limits_chunks(self):
|
|
"""Note-Level-Aggregation limitiert Chunks pro Note"""
|
|
# Mock-Daten: 5 Chunks von Note1, 3 Chunks von Note2
|
|
mock_hits = [
|
|
("c1", 0.9, {"note_id": "note1", "chunk_id": "c1"}),
|
|
("c2", 0.85, {"note_id": "note1", "chunk_id": "c2"}),
|
|
("c3", 0.8, {"note_id": "note2", "chunk_id": "c3"}),
|
|
("c4", 0.75, {"note_id": "note1", "chunk_id": "c4"}),
|
|
("c5", 0.7, {"note_id": "note2", "chunk_id": "c5"}),
|
|
("c6", 0.65, {"note_id": "note1", "chunk_id": "c6"}),
|
|
("c7", 0.6, {"note_id": "note1", "chunk_id": "c7"}),
|
|
]
|
|
|
|
# Simuliere Note-Level-Aggregation mit max_chunks_per_note=2
|
|
max_chunks_per_note = 2
|
|
pooled = []
|
|
note_count = {}
|
|
|
|
for pid, score, payload in sorted(mock_hits, key=lambda x: x[1], reverse=True):
|
|
note_id = payload["note_id"]
|
|
if note_count.get(note_id, 0) < max_chunks_per_note:
|
|
pooled.append((pid, score, payload))
|
|
note_count[note_id] = note_count.get(note_id, 0) + 1
|
|
|
|
# Erwartung: 2 von note1, 2 von note2 = 4 Chunks
|
|
assert len(pooled) == 4
|
|
|
|
# Prüfe, dass jede Note maximal 2 Chunks hat
|
|
note1_chunks = [p for p in pooled if p[2]["note_id"] == "note1"]
|
|
note2_chunks = [p for p in pooled if p[2]["note_id"] == "note2"]
|
|
assert len(note1_chunks) == 2
|
|
assert len(note2_chunks) == 2
|
|
|
|
|
|
class TestChunkLevelAggregation:
|
|
"""UT-23: Chunk-Level Aggregation (keine Deduplizierung)"""
|
|
|
|
def test_chunk_level_no_dedup(self):
|
|
"""Chunk-Level-Aggregation gibt alle Chunks zurück"""
|
|
mock_hits = [
|
|
("c1", 0.9, {"note_id": "note1"}),
|
|
("c2", 0.85, {"note_id": "note1"}),
|
|
("c3", 0.8, {"note_id": "note1"}),
|
|
("c4", 0.75, {"note_id": "note1"}),
|
|
("c5", 0.7, {"note_id": "note1"}),
|
|
]
|
|
|
|
# Chunk-Level: Keine Deduplizierung
|
|
aggregation_level = "chunk"
|
|
|
|
if aggregation_level == "chunk":
|
|
pooled = mock_hits
|
|
else:
|
|
pooled = [] # Note-Level würde nur 1 behalten
|
|
|
|
# Alle 5 Chunks sollten erhalten bleiben
|
|
assert len(pooled) == 5
|
|
|
|
|
|
class TestQdrantIndexSetup:
|
|
"""UT-24: Qdrant-Index-Setup"""
|
|
|
|
def test_bool_index_method_exists(self):
|
|
"""create_bool_index Methode existiert"""
|
|
from scripts.setup_mindnet_collections import QdrantHTTP
|
|
|
|
q = QdrantHTTP("http://localhost:6333")
|
|
assert hasattr(q, "create_bool_index")
|
|
|
|
def test_setup_includes_is_internal_index(self):
|
|
"""Setup-Funktion enthält is_internal Index"""
|
|
import inspect
|
|
from scripts.setup_mindnet_collections import setup_mindnet_collections
|
|
|
|
# Prüfe den Quellcode der Funktion
|
|
source = inspect.getsource(setup_mindnet_collections)
|
|
|
|
assert "is_internal" in source
|
|
assert "create_bool_index" in source
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|