test: Add regression test for hybrid model - node spectrum overrides catalog
This commit is contained in:
parent
c588372f3a
commit
16dc08cd7d
|
|
@ -319,5 +319,71 @@ Test analysis
|
||||||
assert result.normalized_signals[0].status == SignalStatus.VALID
|
assert result.normalized_signals[0].status == SignalStatus.VALID
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_execute_node_hybrid_model_override():
|
||||||
|
"""
|
||||||
|
Test: Hybrid Model - Node-spezifisches Spektrum überschreibt Catalog
|
||||||
|
|
||||||
|
Kritischer Test für Bug-Fix: Node mit answer_spectrum ["increase", "stable", "decrease"]
|
||||||
|
muss Catalog-Spektrum ["ja", "nein", "unklar"] überschreiben.
|
||||||
|
|
||||||
|
Regression-Test für: https://github.com/anthropics/claude-code/issues/XXX
|
||||||
|
"""
|
||||||
|
from workflow_executor import execute_node
|
||||||
|
from workflow_models import WorkflowNode, QuestionAugmentation
|
||||||
|
|
||||||
|
# Node mit ANDEREM Spektrum als Catalog
|
||||||
|
node = WorkflowNode(
|
||||||
|
id="test_node",
|
||||||
|
type="analysis",
|
||||||
|
prompt_slug="test_prompt",
|
||||||
|
question_augmentations=[
|
||||||
|
QuestionAugmentation(
|
||||||
|
id="q1",
|
||||||
|
type="relevanz",
|
||||||
|
question="Hat sich die Fettmasse verändert?",
|
||||||
|
answer_spectrum=["increase", "stable", "decrease"] # ← Node-spezifisch
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
context = {"variables": {}, "profile_id": "test"}
|
||||||
|
|
||||||
|
# Catalog hat ANDERES Spektrum
|
||||||
|
catalog = {
|
||||||
|
"relevanz": {
|
||||||
|
"answer_spectrum": ["ja", "nein", "unklar"], # ← Catalog-Standard
|
||||||
|
"normalization_rules": None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mock LLM gibt "decrease" zurück (gültig für Node, ungültig für Catalog)
|
||||||
|
async def mock_llm(prompt, model):
|
||||||
|
return """## Analyse
|
||||||
|
Gewicht gesunken
|
||||||
|
|
||||||
|
## Entscheidungsfragen
|
||||||
|
- Relevanz: decrease
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Mock load_prompt_template
|
||||||
|
with patch('workflow_executor.load_prompt_template') as mock_load:
|
||||||
|
mock_load.return_value = "Base prompt"
|
||||||
|
|
||||||
|
result = await execute_node(node, context, catalog, mock_llm)
|
||||||
|
|
||||||
|
# Assertions: "decrease" muss VALID sein (Node-Spektrum), nicht INVALID (Catalog)
|
||||||
|
assert result.status == NodeStatus.EXECUTED
|
||||||
|
assert len(result.normalized_signals) == 1
|
||||||
|
|
||||||
|
signal = result.normalized_signals[0]
|
||||||
|
assert signal.question_type == "relevanz"
|
||||||
|
assert signal.raw_value == "decrease"
|
||||||
|
assert signal.normalized_value == "decrease"
|
||||||
|
assert signal.status == SignalStatus.VALID # ← KRITISCH: Muss VALID sein, nicht INVALID!
|
||||||
|
|
||||||
|
# Wenn dieser Test fehlschlägt, wurde der Catalog benutzt statt Node-Spektrum
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pytest.main([__file__, "-v"])
|
pytest.main([__file__, "-v"])
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user