390 lines
12 KiB
Python
390 lines
12 KiB
Python
"""
|
|
Unit Tests für workflow_executor.py (Phase 2)
|
|
|
|
Run with: PYTHONPATH=./backend pytest tests/backend/test_phase2_workflow_executor.py -v
|
|
"""
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
from workflow_executor import aggregate_results
|
|
from workflow_models import NodeExecutionState, NodeStatus, NormalizedSignal, SignalStatus
|
|
|
|
|
|
# ── aggregate_results Tests ────────────────────────────────────────────────────
|
|
|
|
def test_aggregate_results_basic():
|
|
"""Test: Aggregation mit zwei executed nodes"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="start",
|
|
status=NodeStatus.EXECUTED,
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="body",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Gewichtsentwicklung positiv",
|
|
normalized_signals=[
|
|
NormalizedSignal(
|
|
question_type="relevanz",
|
|
raw_value="ja",
|
|
normalized_value="ja",
|
|
status=SignalStatus.VALID
|
|
)
|
|
],
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:05"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="end",
|
|
status=NodeStatus.EXECUTED,
|
|
started_at="2026-04-03T12:00:05",
|
|
completed_at="2026-04-03T12:00:06"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert "## body" in result["combined_analysis"]
|
|
assert "Gewichtsentwicklung" in result["combined_analysis"]
|
|
assert result["total_nodes"] == 3
|
|
assert result["executed_nodes"] == 3
|
|
assert result["failed_nodes"] == 0
|
|
assert len(result["all_signals"]) == 1
|
|
assert result["all_signals"][0]["question_type"] == "relevanz"
|
|
|
|
|
|
def test_aggregate_results_with_failed_node():
|
|
"""Test: Aggregation mit einem fehlgeschlagenen Knoten"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="node1",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Success",
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="node2",
|
|
status=NodeStatus.FAILED,
|
|
error="LLM timeout",
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:02"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert result["total_nodes"] == 2
|
|
assert result["executed_nodes"] == 1
|
|
assert result["failed_nodes"] == 1
|
|
assert "## node1" in result["combined_analysis"]
|
|
assert "## node2" not in result["combined_analysis"]
|
|
|
|
|
|
def test_aggregate_results_multiple_signals():
|
|
"""Test: Aggregation mit mehreren normalisierten Signalen"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="node1",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Analysis 1",
|
|
normalized_signals=[
|
|
NormalizedSignal(
|
|
question_type="relevanz",
|
|
raw_value="ja",
|
|
normalized_value="ja",
|
|
status=SignalStatus.VALID
|
|
),
|
|
NormalizedSignal(
|
|
question_type="prioritaet",
|
|
raw_value="hoch",
|
|
normalized_value="hoch",
|
|
status=SignalStatus.VALID
|
|
)
|
|
],
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="node2",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Analysis 2",
|
|
normalized_signals=[
|
|
NormalizedSignal(
|
|
question_type="selektion",
|
|
raw_value="nein",
|
|
normalized_value="nein",
|
|
status=SignalStatus.VALID
|
|
)
|
|
],
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:02"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert len(result["all_signals"]) == 3
|
|
assert result["all_signals"][0]["question_type"] == "relevanz"
|
|
assert result["all_signals"][1]["question_type"] == "prioritaet"
|
|
assert result["all_signals"][2]["question_type"] == "selektion"
|
|
|
|
|
|
def test_aggregate_results_empty():
|
|
"""Test: Aggregation mit leerer node_states Liste"""
|
|
result = aggregate_results([])
|
|
|
|
assert result["combined_analysis"] == ""
|
|
assert result["all_signals"] == []
|
|
assert result["total_nodes"] == 0
|
|
assert result["executed_nodes"] == 0
|
|
assert result["failed_nodes"] == 0
|
|
|
|
|
|
def test_aggregate_results_no_analysis_core():
|
|
"""Test: Aggregation mit nodes ohne analysis_core"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="start",
|
|
status=NodeStatus.EXECUTED,
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert result["combined_analysis"] == ""
|
|
assert result["executed_nodes"] == 1
|
|
|
|
|
|
def test_aggregate_results_formatting():
|
|
"""Test: Formatierung der combined_analysis"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="node1",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="First analysis",
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="node2",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Second analysis",
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:02"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
# Prüfe Format: ## node_id\nanalysis_core\n\n## node_id\nanalysis_core
|
|
assert result["combined_analysis"].startswith("## node1\nFirst analysis")
|
|
assert "## node2\nSecond analysis" in result["combined_analysis"]
|
|
assert "\n\n" in result["combined_analysis"] # Separator zwischen Knoten
|
|
|
|
|
|
# ── Integration-ähnliche Tests (ohne echte DB/LLM) ─────────────────────────────
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_start_end():
|
|
"""Test: Start/End Nodes sind No-Ops"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode
|
|
|
|
start_node = WorkflowNode(id="start", type="start")
|
|
end_node = WorkflowNode(id="end", type="end")
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
catalog = {}
|
|
|
|
async def mock_llm(prompt, model):
|
|
return "should not be called"
|
|
|
|
# Test start
|
|
result = await execute_node(start_node, context, catalog, mock_llm)
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core is None
|
|
|
|
# Test end
|
|
result = await execute_node(end_node, context, catalog, mock_llm)
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_unknown_type():
|
|
"""Test: Unbekannter Node-Typ wirft Fehler"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode
|
|
|
|
# Phase 2 unterstützt nur start, end, analysis
|
|
logic_node = WorkflowNode(id="logic1", type="logic")
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
catalog = {}
|
|
|
|
async def mock_llm(prompt, model):
|
|
return ""
|
|
|
|
result = await execute_node(logic_node, context, catalog, mock_llm)
|
|
|
|
# Sollte FAILED sein mit Fehlermeldung
|
|
assert result.status == NodeStatus.FAILED
|
|
assert "not implemented in Phase 2" in result.error
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_analysis_simple():
|
|
"""Test: Analysis Node ohne Fragenergänzung"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode
|
|
|
|
node = WorkflowNode(
|
|
id="test_node",
|
|
type="analysis",
|
|
prompt_slug="test_prompt",
|
|
question_augmentations=None
|
|
)
|
|
|
|
context = {"variables": {"name": "Test"}, "profile_id": "test"}
|
|
catalog = {}
|
|
|
|
# Mock LLM
|
|
async def mock_llm(prompt, model):
|
|
return "## Analyse\nTest analysis content"
|
|
|
|
# Mock load_prompt_template
|
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
|
mock_load.return_value = "Test prompt for {{name}}"
|
|
|
|
result = await execute_node(node, context, catalog, mock_llm)
|
|
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core == "Test analysis content"
|
|
assert len(result.normalized_signals) == 0 # Keine Fragen
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_analysis_with_questions():
|
|
"""Test: Analysis Node mit Fragenergänzung und Normalisierung"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, QuestionAugmentation
|
|
|
|
node = WorkflowNode(
|
|
id="test_node",
|
|
type="analysis",
|
|
prompt_slug="test_prompt",
|
|
question_augmentations=[
|
|
QuestionAugmentation(
|
|
id="q1",
|
|
type="relevanz",
|
|
question="Ist relevant?",
|
|
answer_spectrum=["ja", "nein", "unklar"]
|
|
)
|
|
]
|
|
)
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
catalog = {
|
|
"relevanz": {
|
|
"answer_spectrum": ["ja", "nein", "unklar"],
|
|
"normalization_rules": None
|
|
}
|
|
}
|
|
|
|
# Mock LLM
|
|
async def mock_llm(prompt, model):
|
|
# LLM antwortet mit Fragenergänzung
|
|
return """## Analyse
|
|
Test analysis
|
|
|
|
## Entscheidungsfragen
|
|
- Relevanz: ja
|
|
"""
|
|
|
|
# Mock load_prompt_template
|
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
|
mock_load.return_value = "Base prompt"
|
|
|
|
result = await execute_node(node, context, catalog, mock_llm)
|
|
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core == "Test analysis"
|
|
assert len(result.normalized_signals) == 1
|
|
assert result.normalized_signals[0].question_type == "relevanz"
|
|
assert result.normalized_signals[0].normalized_value == "ja"
|
|
assert result.normalized_signals[0].status == SignalStatus.VALID
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_hybrid_model_override():
|
|
"""
|
|
Test: Hybrid Model - Node-spezifisches Spektrum überschreibt Catalog
|
|
|
|
Kritischer Test für Bug-Fix: Node mit answer_spectrum ["increase", "stable", "decrease"]
|
|
muss Catalog-Spektrum ["ja", "nein", "unklar"] überschreiben.
|
|
|
|
Regression-Test für: https://github.com/anthropics/claude-code/issues/XXX
|
|
"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, QuestionAugmentation
|
|
|
|
# Node mit ANDEREM Spektrum als Catalog
|
|
node = WorkflowNode(
|
|
id="test_node",
|
|
type="analysis",
|
|
prompt_slug="test_prompt",
|
|
question_augmentations=[
|
|
QuestionAugmentation(
|
|
id="q1",
|
|
type="relevanz",
|
|
question="Hat sich die Fettmasse verändert?",
|
|
answer_spectrum=["increase", "stable", "decrease"] # ← Node-spezifisch
|
|
)
|
|
]
|
|
)
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
|
|
# Catalog hat ANDERES Spektrum
|
|
catalog = {
|
|
"relevanz": {
|
|
"answer_spectrum": ["ja", "nein", "unklar"], # ← Catalog-Standard
|
|
"normalization_rules": None
|
|
}
|
|
}
|
|
|
|
# Mock LLM gibt "decrease" zurück (gültig für Node, ungültig für Catalog)
|
|
async def mock_llm(prompt, model):
|
|
return """## Analyse
|
|
Gewicht gesunken
|
|
|
|
## Entscheidungsfragen
|
|
- Relevanz: decrease
|
|
"""
|
|
|
|
# Mock load_prompt_template
|
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
|
mock_load.return_value = "Base prompt"
|
|
|
|
result = await execute_node(node, context, catalog, mock_llm)
|
|
|
|
# Assertions: "decrease" muss VALID sein (Node-Spektrum), nicht INVALID (Catalog)
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert len(result.normalized_signals) == 1
|
|
|
|
signal = result.normalized_signals[0]
|
|
assert signal.question_type == "relevanz"
|
|
assert signal.raw_value == "decrease"
|
|
assert signal.normalized_value == "decrease"
|
|
assert signal.status == SignalStatus.VALID # ← KRITISCH: Muss VALID sein, nicht INVALID!
|
|
|
|
# Wenn dieser Test fehlschlägt, wurde der Catalog benutzt statt Node-Spektrum
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|