From de5b8cbf15a86382ae3b1102495dc32b844eb2eb Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Thu, 9 Apr 2026 21:13:50 +0200
Subject: [PATCH] fix: CRITICAL - Use question ID (not type) for LLM
 communication
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ROOT ARCHITECTURAL CHANGE:
Multiple questions with same type are now supported!

Problem:
- question_augmenter used q.type as LLM key
- If two questions had type="unsicherheit":
  - LLM saw duplicate keys: "- unsicherheit: [ja/nein]"
  - Could only answer one
  - Signals were ambiguous

Solution:
- Use question.id as LLM key (unique by design)
- Keep type for normalization logic
- Map id → type internally

Backend question_augmenter.py:
- format_question_list() now uses q.id as key
- Format: "- **q21**: [ja/nein]  # Question text"
- Question text as comment for LLM context

Backend workflow_executor.py:
- Removed type→id mapping (no longer needed)
- decision_signals now keyed by id (from LLM)
- Build id→type catalog for normalization
- NormalizedSignal.question_type stores id (not type!)
- End Node template: signal_{id} directly available

Flow:
1. Questions sent to LLM: "- q21: [ja/nein]  # Ist Protein unsicher?"
2. LLM answers: "- q21: nein"
3. Normalization: id→type lookup for spectrum/rules
4. Template: {{ node_4.signal_q21 }} = "nein"

Example (TWO unsicherheit questions):
Questions:
- q21: type=unsicherheit, question="Ist Protein unsicher?"
- q22: type=unsicherheit, question="Ist Energie unsicher?"

LLM Prompt:
```
## Entscheidungsfragen
- **q21**: [ja/nein]  # Ist Protein unsicher?
- **q22**: [ja/nein]  # Ist Energie unsicher?
```

LLM Response:
```
- q21: nein
- q22: ja
```

Template:
```
{{ node_4.signal_q21 }} → "nein"
{{ node_4.signal_q22 }} → "ja"
```

BREAKING CHANGE:
- Old workflows with decision_signals keyed by type will break
- Need to re-execute workflows after update

Issue: Cannot have multiple questions with same type
Version: 0.9p (workflow module)
Part 3: End Node Template Engine - ARCHITECTURAL FIX

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/question_augmenter.py | 11 +++--
 backend/workflow_executor.py  | 75 +++++++++++++++++------------------
 2 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/backend/question_augmenter.py b/backend/question_augmenter.py
index 7da8529..ee15b9d 100644
--- a/backend/question_augmenter.py
+++ b/backend/question_augmenter.py
@@ -235,10 +235,13 @@ def format_question_list(questions: List[QuestionAugmentation]) -> str:
     """
     Formatiert Fragenliste als Markdown-Liste.
 
+    Verwendet question.id als Schlüssel (nicht type), damit mehrere Fragen
+    des gleichen Typs möglich sind.
+
     Format:
     ```
-    - Relevanz: [ja/nein/unklar]
-    - Priorität: [hoch/mittel/niedrig/unklar]
+    - q21: [ja/nein/unklar]  # Ist Protein unsicher?
+    - q22: [ja/nein/unklar]  # Ist Energie unsicher?
     ```
 
     Args:
@@ -250,7 +253,9 @@ def format_question_list(questions: List[QuestionAugmentation]) -> str:
     lines = []
     for q in questions:
         spectrum_str = "/".join(q.answer_spectrum)
-        lines.append(f"- **{q.type.capitalize()}**: [{spectrum_str}]")
+        # Use ID as key (unique), show question text as comment for context
+        question_text = q.question[:50] if q.question else q.type
+        lines.append(f"- **{q.id}**: [{spectrum_str}]  # {question_text}")
 
     return "\n".join(lines)
 
diff --git a/backend/workflow_executor.py b/backend/workflow_executor.py
index cb3bcb9..e81e574 100644
--- a/backend/workflow_executor.py
+++ b/backend/workflow_executor.py
@@ -28,7 +28,7 @@ from question_augmenter import (
     parse_question_augmentations_from_jsonb
 )
 from result_container_parser import parse_result_container
-from normalization_engine import normalize_all_signals, load_question_catalog
+from normalization_engine import normalize_all_signals, normalize_signal_value, load_question_catalog
 from logic_evaluator import evaluate_logic_expression, resolve_signal_reference
 from join_evaluator import evaluate_join_node as evaluate_join_node_core
 from db import get_db, get_cursor
@@ -311,23 +311,45 @@ async def execute_node(
             logger.debug(f"Node {node.id}: Parsed response (status: {parsed['parsing_status']})")
 
             # 6. Normalize Signals
+            # NOTE: decision_signals now use question.id as key (not type)
+            # We need to build a catalog: id → {type, spectrum} for normalization
             normalized_signals = []
             if parsed["decision_signals"]:
-                # Hybrid Model: Node-spezifische Questions überschreiben Catalog
-                node_catalog = catalog.copy()
+                # Build catalog: id → answer_spectrum (for normalization)
+                id_catalog = {}
                 if questions:
                     for q in questions:
                         q_dict = q.model_dump() if hasattr(q, 'model_dump') else q
-                        node_catalog[q_dict['type']] = {
+                        id_catalog[q_dict['id']] = {
+                            "type": q_dict['type'],  # Keep type for normalization
                             "answer_spectrum": q_dict['answer_spectrum'],
                             "normalization_rules": None  # Node-Questions haben keine Synonyme
                         }
-                        logger.debug(f"Node {node.id}: Override catalog for '{q_dict['type']}' with node-specific spectrum")
 
-                normalized_signals = normalize_all_signals(
-                    decision_signals=parsed["decision_signals"],
-                    catalog_dict=node_catalog
-                )
+                # Normalize each signal (signals keyed by ID now)
+                for signal_id, signal_value in parsed["decision_signals"].items():
+                    if signal_id in id_catalog:
+                        q_config = id_catalog[signal_id]
+                        # Use the type-based catalog for normalization rules (if any)
+                        type_catalog_entry = catalog.get(q_config['type'], {})
+
+                        # Normalize with question-specific spectrum
+                        normalized = normalize_signal_value(
+                            raw_value=signal_value,
+                            answer_spectrum=q_config['answer_spectrum'],
+                            normalization_rules=type_catalog_entry.get('normalization_rules')
+                        )
+
+                        normalized_signals.append(NormalizedSignal(
+                            question_type=signal_id,  # Store ID as question_type (for template access)
+                            raw_value=signal_value,
+                            normalized_value=normalized.get('normalized_value'),
+                            status=normalized.get('status'),
+                            confidence=normalized.get('confidence'),
+                            metadata=normalized.get('metadata')
+                        ))
+                        logger.debug(f"Node {node.id}: Normalized signal '{signal_id}' = '{signal_value}' → '{normalized.get('normalized_value')}'")
+
                 logger.info(f"Node {node.id}: Normalized {len(normalized_signals)} signals")
 
             return NodeExecutionState(
@@ -603,41 +625,18 @@ def execute_end_node(
                     "status": node_state.status.value if node_state.status else "unknown",
                 }
 
-                # Build direct question_type → question_id mapping
-                question_type_to_id = {}
-                if graph:
-                    workflow_node = next((n for n in graph.nodes if n.id == node_id), None)
-                    if workflow_node and workflow_node.question_augmentations:
-                        for q in workflow_node.question_augmentations:
-                            q_dict = q.model_dump() if hasattr(q, 'model_dump') else q
-                            q_type = q_dict.get('type')
-                            q_id = q_dict.get('id')
-                            if q_type and q_id:
-                                # WICHTIG: Wenn mehrere Fragen den gleichen type haben, ist das ein Fehler!
-                                if q_type in question_type_to_id:
-                                    logger.error(
-                                        f"DUPLICATE question type '{q_type}'! "
-                                        f"First ID: {question_type_to_id[q_type]}, Second ID: {q_id}. "
-                                        f"Each question MUST have a UNIQUE type!"
-                                    )
-                                question_type_to_id[q_type] = q_id
-
                 # Add normalized signals as {{node_id.signal_ID}}
+                # NOTE: question_type now IS the ID (not the type!)
                 if node_state.normalized_signals:
                     for signal in node_state.normalized_signals:
                         # Convert NormalizedSignal object to dict if needed
                         signal_dict = signal.model_dump() if hasattr(signal, 'model_dump') else signal
-                        q_type = signal_dict['question_type']
+                        q_id = signal_dict['question_type']  # This is actually the ID now!
 
-                        # Direct lookup: question_type → question_id
-                        if q_type in question_type_to_id:
-                            q_id = question_type_to_id[q_type]
-                            signal_key = f"signal_{q_id}"
-                            signal_value = signal_dict['normalized_value'] or signal_dict['raw_value']
-                            node_context[signal_key] = signal_value
-                            logger.info(f"Mapped signal: {q_type} → {signal_key} = '{signal_value}'")
-                        else:
-                            logger.warning(f"No question_id found for signal type='{q_type}' (available types: {list(question_type_to_id.keys())})")
+                        signal_key = f"signal_{q_id}"
+                        signal_value = signal_dict['normalized_value'] or signal_dict['raw_value']
+                        node_context[signal_key] = signal_value
+                        logger.info(f"Mapped signal: {q_id} → {signal_key} = '{signal_value}'")
 
                 # Add question texts as {{node_id.question_ID}}
                 if graph: