From de5b8cbf15a86382ae3b1102495dc32b844eb2eb Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 9 Apr 2026 21:13:50 +0200 Subject: [PATCH] fix: CRITICAL - Use question ID (not type) for LLM communication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ROOT ARCHITECTURAL CHANGE: Multiple questions with same type are now supported! Problem: - question_augmenter used q.type as LLM key - If two questions had type="unsicherheit": - LLM saw duplicate keys: "- unsicherheit: [ja/nein]" - Could only answer one - Signals were ambiguous Solution: - Use question.id as LLM key (unique by design) - Keep type for normalization logic - Map id → type internally Backend question_augmenter.py: - format_question_list() now uses q.id as key - Format: "- **q21**: [ja/nein] # Question text" - Question text as comment for LLM context Backend workflow_executor.py: - Removed type→id mapping (no longer needed) - decision_signals now keyed by id (from LLM) - Build id→type catalog for normalization - NormalizedSignal.question_type stores id (not type!) - End Node template: signal_{id} directly available Flow: 1. Questions sent to LLM: "- q21: [ja/nein] # Ist Protein unsicher?" 2. LLM answers: "- q21: nein" 3. Normalization: id→type lookup for spectrum/rules 4. Template: {{ node_4.signal_q21 }} = "nein" Example (TWO unsicherheit questions): Questions: - q21: type=unsicherheit, question="Ist Protein unsicher?" - q22: type=unsicherheit, question="Ist Energie unsicher?" LLM Prompt: ``` ## Entscheidungsfragen - **q21**: [ja/nein] # Ist Protein unsicher? - **q22**: [ja/nein] # Ist Energie unsicher? ``` LLM Response: ``` - q21: nein - q22: ja ``` Template: ``` {{ node_4.signal_q21 }} → "nein" {{ node_4.signal_q22 }} → "ja" ``` BREAKING CHANGE: - Old workflows with decision_signals keyed by type will break - Need to re-execute workflows after update Issue: Cannot have multiple questions with same type Version: 0.9p (workflow module) Part 3: End Node Template Engine - ARCHITECTURAL FIX Co-Authored-By: Claude Opus 4.6 --- backend/question_augmenter.py | 11 +++-- backend/workflow_executor.py | 75 +++++++++++++++++------------------ 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/backend/question_augmenter.py b/backend/question_augmenter.py index 7da8529..ee15b9d 100644 --- a/backend/question_augmenter.py +++ b/backend/question_augmenter.py @@ -235,10 +235,13 @@ def format_question_list(questions: List[QuestionAugmentation]) -> str: """ Formatiert Fragenliste als Markdown-Liste. + Verwendet question.id als Schlüssel (nicht type), damit mehrere Fragen + des gleichen Typs möglich sind. + Format: ``` - - Relevanz: [ja/nein/unklar] - - Priorität: [hoch/mittel/niedrig/unklar] + - q21: [ja/nein/unklar] # Ist Protein unsicher? + - q22: [ja/nein/unklar] # Ist Energie unsicher? ``` Args: @@ -250,7 +253,9 @@ def format_question_list(questions: List[QuestionAugmentation]) -> str: lines = [] for q in questions: spectrum_str = "/".join(q.answer_spectrum) - lines.append(f"- **{q.type.capitalize()}**: [{spectrum_str}]") + # Use ID as key (unique), show question text as comment for context + question_text = q.question[:50] if q.question else q.type + lines.append(f"- **{q.id}**: [{spectrum_str}] # {question_text}") return "\n".join(lines) diff --git a/backend/workflow_executor.py b/backend/workflow_executor.py index cb3bcb9..e81e574 100644 --- a/backend/workflow_executor.py +++ b/backend/workflow_executor.py @@ -28,7 +28,7 @@ from question_augmenter import ( parse_question_augmentations_from_jsonb ) from result_container_parser import parse_result_container -from normalization_engine import normalize_all_signals, load_question_catalog +from normalization_engine import normalize_all_signals, normalize_signal_value, load_question_catalog from logic_evaluator import evaluate_logic_expression, resolve_signal_reference from join_evaluator import evaluate_join_node as evaluate_join_node_core from db import get_db, get_cursor @@ -311,23 +311,45 @@ async def execute_node( logger.debug(f"Node {node.id}: Parsed response (status: {parsed['parsing_status']})") # 6. Normalize Signals + # NOTE: decision_signals now use question.id as key (not type) + # We need to build a catalog: id → {type, spectrum} for normalization normalized_signals = [] if parsed["decision_signals"]: - # Hybrid Model: Node-spezifische Questions überschreiben Catalog - node_catalog = catalog.copy() + # Build catalog: id → answer_spectrum (for normalization) + id_catalog = {} if questions: for q in questions: q_dict = q.model_dump() if hasattr(q, 'model_dump') else q - node_catalog[q_dict['type']] = { + id_catalog[q_dict['id']] = { + "type": q_dict['type'], # Keep type for normalization "answer_spectrum": q_dict['answer_spectrum'], "normalization_rules": None # Node-Questions haben keine Synonyme } - logger.debug(f"Node {node.id}: Override catalog for '{q_dict['type']}' with node-specific spectrum") - normalized_signals = normalize_all_signals( - decision_signals=parsed["decision_signals"], - catalog_dict=node_catalog - ) + # Normalize each signal (signals keyed by ID now) + for signal_id, signal_value in parsed["decision_signals"].items(): + if signal_id in id_catalog: + q_config = id_catalog[signal_id] + # Use the type-based catalog for normalization rules (if any) + type_catalog_entry = catalog.get(q_config['type'], {}) + + # Normalize with question-specific spectrum + normalized = normalize_signal_value( + raw_value=signal_value, + answer_spectrum=q_config['answer_spectrum'], + normalization_rules=type_catalog_entry.get('normalization_rules') + ) + + normalized_signals.append(NormalizedSignal( + question_type=signal_id, # Store ID as question_type (for template access) + raw_value=signal_value, + normalized_value=normalized.get('normalized_value'), + status=normalized.get('status'), + confidence=normalized.get('confidence'), + metadata=normalized.get('metadata') + )) + logger.debug(f"Node {node.id}: Normalized signal '{signal_id}' = '{signal_value}' → '{normalized.get('normalized_value')}'") + logger.info(f"Node {node.id}: Normalized {len(normalized_signals)} signals") return NodeExecutionState( @@ -603,41 +625,18 @@ def execute_end_node( "status": node_state.status.value if node_state.status else "unknown", } - # Build direct question_type → question_id mapping - question_type_to_id = {} - if graph: - workflow_node = next((n for n in graph.nodes if n.id == node_id), None) - if workflow_node and workflow_node.question_augmentations: - for q in workflow_node.question_augmentations: - q_dict = q.model_dump() if hasattr(q, 'model_dump') else q - q_type = q_dict.get('type') - q_id = q_dict.get('id') - if q_type and q_id: - # WICHTIG: Wenn mehrere Fragen den gleichen type haben, ist das ein Fehler! - if q_type in question_type_to_id: - logger.error( - f"DUPLICATE question type '{q_type}'! " - f"First ID: {question_type_to_id[q_type]}, Second ID: {q_id}. " - f"Each question MUST have a UNIQUE type!" - ) - question_type_to_id[q_type] = q_id - # Add normalized signals as {{node_id.signal_ID}} + # NOTE: question_type now IS the ID (not the type!) if node_state.normalized_signals: for signal in node_state.normalized_signals: # Convert NormalizedSignal object to dict if needed signal_dict = signal.model_dump() if hasattr(signal, 'model_dump') else signal - q_type = signal_dict['question_type'] + q_id = signal_dict['question_type'] # This is actually the ID now! - # Direct lookup: question_type → question_id - if q_type in question_type_to_id: - q_id = question_type_to_id[q_type] - signal_key = f"signal_{q_id}" - signal_value = signal_dict['normalized_value'] or signal_dict['raw_value'] - node_context[signal_key] = signal_value - logger.info(f"Mapped signal: {q_type} → {signal_key} = '{signal_value}'") - else: - logger.warning(f"No question_id found for signal type='{q_type}' (available types: {list(question_type_to_id.keys())})") + signal_key = f"signal_{q_id}" + signal_value = signal_dict['normalized_value'] or signal_dict['raw_value'] + node_context[signal_key] = signal_value + logger.info(f"Mapped signal: {q_id} → {signal_key} = '{signal_value}'") # Add question texts as {{node_id.question_ID}} if graph: