Backend:
- logic_evaluator.py (NEU, 307 Zeilen): Deterministischer Logic Evaluator
- Vergleichsoperatoren: EQ, NEQ, IN, NOT_IN, GT, LT, GTE, LTE, CONTAINS
- Logische Operatoren: AND, OR, NOT mit Verschachtelung
- Resolve signal references (node_id.question_type)
- Error handling für UNCLEAR/INVALID/NOT_DECIDABLE Signale
- workflow_executor.py (ERWEITERT):
- execute_logic_node(): Bedingungen evaluieren, Pfade aktivieren/deaktivieren
- execute_workflow(): BFS-Traversierung mit Edge-Activation statt Sequential
- _apply_fallback(): 4 Fallback-Strategien (CONSERVATIVE_SKIP, DEFAULT_PATH, UNCERTAINTY_PATH, DOCUMENT_ONLY)
- _has_active_incoming_edge(): Prüft ob Node erreichbar ist
- _get_edges_by_label(): Findet then/else/uncertainty Pfade
- workflow_models.py (ERWEITERT):
- LogicOperator.CONTAINS hinzugefügt
- version.py: 0.9k → 0.9l, workflow 0.3.0 → 0.4.0
Tests:
- test_phase3_logic_evaluator.py (NEU): 20 Unit Tests (alle passing)
- Comparison operators (EQ, NEQ, IN, GT, LT, CONTAINS)
- Logical operators (AND, OR, NOT)
- Nested expressions
- Error handling (missing refs, UNCLEAR/INVALID signals)
- test_phase2_workflow_executor.py (AKTUALISIERT): 11 Tests (alle passing)
- execute_node() graph parameter hinzugefügt (Phase 3 requirement)
- test_execute_node_unknown_type: logic → join (logic jetzt implementiert)
- test_phase3_workflow_branching.py (NEU): Integration Tests vorbereitet
- Erfordert vollständige DB-Mock-Strategie (wird in E2E-Test nachgeholt)
Phase 2 Backward Compatibility: ✅ Alle Phase 2 Tests bestehen weiterhin
Konzept: .claude/task/Workflow_engine_prompting_engine/konzept_workflow_engine_konsolidated.md
Anforderungsanalyse: .claude/task/Workflow_engine_prompting_engine/phase3_anforderungsanalyse.md
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
395 lines
13 KiB
Python
395 lines
13 KiB
Python
"""
|
|
Unit Tests für workflow_executor.py (Phase 2)
|
|
|
|
Run with: PYTHONPATH=./backend pytest tests/backend/test_phase2_workflow_executor.py -v
|
|
"""
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
from workflow_executor import aggregate_results
|
|
from workflow_models import NodeExecutionState, NodeStatus, NormalizedSignal, SignalStatus
|
|
|
|
|
|
# ── aggregate_results Tests ────────────────────────────────────────────────────
|
|
|
|
def test_aggregate_results_basic():
|
|
"""Test: Aggregation mit zwei executed nodes"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="start",
|
|
status=NodeStatus.EXECUTED,
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="body",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Gewichtsentwicklung positiv",
|
|
normalized_signals=[
|
|
NormalizedSignal(
|
|
question_type="relevanz",
|
|
raw_value="ja",
|
|
normalized_value="ja",
|
|
status=SignalStatus.VALID
|
|
)
|
|
],
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:05"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="end",
|
|
status=NodeStatus.EXECUTED,
|
|
started_at="2026-04-03T12:00:05",
|
|
completed_at="2026-04-03T12:00:06"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert "## body" in result["combined_analysis"]
|
|
assert "Gewichtsentwicklung" in result["combined_analysis"]
|
|
assert result["total_nodes"] == 3
|
|
assert result["executed_nodes"] == 3
|
|
assert result["failed_nodes"] == 0
|
|
assert len(result["all_signals"]) == 1
|
|
assert result["all_signals"][0]["question_type"] == "relevanz"
|
|
|
|
|
|
def test_aggregate_results_with_failed_node():
|
|
"""Test: Aggregation mit einem fehlgeschlagenen Knoten"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="node1",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Success",
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="node2",
|
|
status=NodeStatus.FAILED,
|
|
error="LLM timeout",
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:02"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert result["total_nodes"] == 2
|
|
assert result["executed_nodes"] == 1
|
|
assert result["failed_nodes"] == 1
|
|
assert "## node1" in result["combined_analysis"]
|
|
assert "## node2" not in result["combined_analysis"]
|
|
|
|
|
|
def test_aggregate_results_multiple_signals():
|
|
"""Test: Aggregation mit mehreren normalisierten Signalen"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="node1",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Analysis 1",
|
|
normalized_signals=[
|
|
NormalizedSignal(
|
|
question_type="relevanz",
|
|
raw_value="ja",
|
|
normalized_value="ja",
|
|
status=SignalStatus.VALID
|
|
),
|
|
NormalizedSignal(
|
|
question_type="prioritaet",
|
|
raw_value="hoch",
|
|
normalized_value="hoch",
|
|
status=SignalStatus.VALID
|
|
)
|
|
],
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="node2",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Analysis 2",
|
|
normalized_signals=[
|
|
NormalizedSignal(
|
|
question_type="selektion",
|
|
raw_value="nein",
|
|
normalized_value="nein",
|
|
status=SignalStatus.VALID
|
|
)
|
|
],
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:02"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert len(result["all_signals"]) == 3
|
|
assert result["all_signals"][0]["question_type"] == "relevanz"
|
|
assert result["all_signals"][1]["question_type"] == "prioritaet"
|
|
assert result["all_signals"][2]["question_type"] == "selektion"
|
|
|
|
|
|
def test_aggregate_results_empty():
|
|
"""Test: Aggregation mit leerer node_states Liste"""
|
|
result = aggregate_results([])
|
|
|
|
assert result["combined_analysis"] == ""
|
|
assert result["all_signals"] == []
|
|
assert result["total_nodes"] == 0
|
|
assert result["executed_nodes"] == 0
|
|
assert result["failed_nodes"] == 0
|
|
|
|
|
|
def test_aggregate_results_no_analysis_core():
|
|
"""Test: Aggregation mit nodes ohne analysis_core"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="start",
|
|
status=NodeStatus.EXECUTED,
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
assert result["combined_analysis"] == ""
|
|
assert result["executed_nodes"] == 1
|
|
|
|
|
|
def test_aggregate_results_formatting():
|
|
"""Test: Formatierung der combined_analysis"""
|
|
states = [
|
|
NodeExecutionState(
|
|
node_id="node1",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="First analysis",
|
|
started_at="2026-04-03T12:00:00",
|
|
completed_at="2026-04-03T12:00:01"
|
|
),
|
|
NodeExecutionState(
|
|
node_id="node2",
|
|
status=NodeStatus.EXECUTED,
|
|
analysis_core="Second analysis",
|
|
started_at="2026-04-03T12:00:01",
|
|
completed_at="2026-04-03T12:00:02"
|
|
)
|
|
]
|
|
|
|
result = aggregate_results(states)
|
|
|
|
# Prüfe Format: ## node_id\nanalysis_core\n\n## node_id\nanalysis_core
|
|
assert result["combined_analysis"].startswith("## node1\nFirst analysis")
|
|
assert "## node2\nSecond analysis" in result["combined_analysis"]
|
|
assert "\n\n" in result["combined_analysis"] # Separator zwischen Knoten
|
|
|
|
|
|
# ── Integration-ähnliche Tests (ohne echte DB/LLM) ─────────────────────────────
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_start_end():
|
|
"""Test: Start/End Nodes sind No-Ops"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, WorkflowGraph
|
|
|
|
start_node = WorkflowNode(id="start", type="start")
|
|
end_node = WorkflowNode(id="end", type="end")
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
catalog = {}
|
|
mock_graph = WorkflowGraph(nodes=[], edges=[]) # Phase 3: graph parameter required
|
|
|
|
async def mock_llm(prompt, model):
|
|
return "should not be called"
|
|
|
|
# Test start
|
|
result = await execute_node(start_node, context, catalog, mock_graph, mock_llm)
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core is None
|
|
|
|
# Test end
|
|
result = await execute_node(end_node, context, catalog, mock_graph, mock_llm)
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_unknown_type():
|
|
"""Test: Unbekannter Node-Typ wirft Fehler"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, WorkflowGraph
|
|
|
|
# Phase 3: logic is now implemented, test with join instead
|
|
join_node = WorkflowNode(id="join1", type="join")
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
catalog = {}
|
|
mock_graph = WorkflowGraph(nodes=[], edges=[])
|
|
|
|
async def mock_llm(prompt, model):
|
|
return ""
|
|
|
|
result = await execute_node(join_node, context, catalog, mock_graph, mock_llm)
|
|
|
|
# Sollte FAILED sein mit Fehlermeldung
|
|
assert result.status == NodeStatus.FAILED
|
|
assert "not implemented" in result.error.lower() or "phase 4" in result.error.lower()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_analysis_simple():
|
|
"""Test: Analysis Node ohne Fragenergänzung"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, WorkflowGraph
|
|
|
|
node = WorkflowNode(
|
|
id="test_node",
|
|
type="analysis",
|
|
prompt_slug="test_prompt",
|
|
question_augmentations=None
|
|
)
|
|
|
|
context = {"variables": {"name": "Test"}, "profile_id": "test"}
|
|
catalog = {}
|
|
mock_graph = WorkflowGraph(nodes=[], edges=[])
|
|
|
|
# Mock LLM
|
|
async def mock_llm(prompt, model):
|
|
return "## Analyse\nTest analysis content"
|
|
|
|
# Mock load_prompt_template
|
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
|
mock_load.return_value = "Test prompt for {{name}}"
|
|
|
|
result = await execute_node(node, context, catalog, mock_graph, mock_llm)
|
|
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core == "Test analysis content"
|
|
assert len(result.normalized_signals) == 0 # Keine Fragen
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_analysis_with_questions():
|
|
"""Test: Analysis Node mit Fragenergänzung und Normalisierung"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, QuestionAugmentation, WorkflowGraph
|
|
|
|
node = WorkflowNode(
|
|
id="test_node",
|
|
type="analysis",
|
|
prompt_slug="test_prompt",
|
|
question_augmentations=[
|
|
QuestionAugmentation(
|
|
id="q1",
|
|
type="relevanz",
|
|
question="Ist relevant?",
|
|
answer_spectrum=["ja", "nein", "unklar"]
|
|
)
|
|
]
|
|
)
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
catalog = {
|
|
"relevanz": {
|
|
"answer_spectrum": ["ja", "nein", "unklar"],
|
|
"normalization_rules": None
|
|
}
|
|
}
|
|
mock_graph = WorkflowGraph(nodes=[], edges=[])
|
|
|
|
# Mock LLM
|
|
async def mock_llm(prompt, model):
|
|
# LLM antwortet mit Fragenergänzung
|
|
return """## Analyse
|
|
Test analysis
|
|
|
|
## Entscheidungsfragen
|
|
- Relevanz: ja
|
|
"""
|
|
|
|
# Mock load_prompt_template
|
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
|
mock_load.return_value = "Base prompt"
|
|
|
|
result = await execute_node(node, context, catalog, mock_graph, mock_llm)
|
|
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert result.analysis_core == "Test analysis"
|
|
assert len(result.normalized_signals) == 1
|
|
assert result.normalized_signals[0].question_type == "relevanz"
|
|
assert result.normalized_signals[0].normalized_value == "ja"
|
|
assert result.normalized_signals[0].status == SignalStatus.VALID
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_node_hybrid_model_override():
|
|
"""
|
|
Test: Hybrid Model - Node-spezifisches Spektrum überschreibt Catalog
|
|
|
|
Kritischer Test für Bug-Fix: Node mit answer_spectrum ["increase", "stable", "decrease"]
|
|
muss Catalog-Spektrum ["ja", "nein", "unklar"] überschreiben.
|
|
|
|
Regression-Test für: https://github.com/anthropics/claude-code/issues/XXX
|
|
"""
|
|
from workflow_executor import execute_node
|
|
from workflow_models import WorkflowNode, QuestionAugmentation, WorkflowGraph
|
|
|
|
# Node mit ANDEREM Spektrum als Catalog
|
|
node = WorkflowNode(
|
|
id="test_node",
|
|
type="analysis",
|
|
prompt_slug="test_prompt",
|
|
question_augmentations=[
|
|
QuestionAugmentation(
|
|
id="q1",
|
|
type="relevanz",
|
|
question="Hat sich die Fettmasse verändert?",
|
|
answer_spectrum=["increase", "stable", "decrease"] # ← Node-spezifisch
|
|
)
|
|
]
|
|
)
|
|
|
|
context = {"variables": {}, "profile_id": "test"}
|
|
|
|
# Catalog hat ANDERES Spektrum
|
|
catalog = {
|
|
"relevanz": {
|
|
"answer_spectrum": ["ja", "nein", "unklar"], # ← Catalog-Standard
|
|
"normalization_rules": None
|
|
}
|
|
}
|
|
mock_graph = WorkflowGraph(nodes=[], edges=[])
|
|
|
|
# Mock LLM gibt "decrease" zurück (gültig für Node, ungültig für Catalog)
|
|
async def mock_llm(prompt, model):
|
|
return """## Analyse
|
|
Gewicht gesunken
|
|
|
|
## Entscheidungsfragen
|
|
- Relevanz: decrease
|
|
"""
|
|
|
|
# Mock load_prompt_template
|
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
|
mock_load.return_value = "Base prompt"
|
|
|
|
result = await execute_node(node, context, catalog, mock_graph, mock_llm)
|
|
|
|
# Assertions: "decrease" muss VALID sein (Node-Spektrum), nicht INVALID (Catalog)
|
|
assert result.status == NodeStatus.EXECUTED
|
|
assert len(result.normalized_signals) == 1
|
|
|
|
signal = result.normalized_signals[0]
|
|
assert signal.question_type == "relevanz"
|
|
assert signal.raw_value == "decrease"
|
|
assert signal.normalized_value == "decrease"
|
|
assert signal.status == SignalStatus.VALID # ← KRITISCH: Muss VALID sein, nicht INVALID!
|
|
|
|
# Wenn dieser Test fehlschlägt, wurde der Catalog benutzt statt Node-Spektrum
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|