feat: debug system for prompt execution (Issue #28)

- Backend: debug mode in prompt_executor with placeholder tracking - Backend: show resolved/unresolved placeholders, final prompts, AI responses - Frontend: test button in UnifiedPromptModal for saved prompts - Frontend: debug output viewer with JSON preview - Frontend: wider placeholder example fields in PlaceholderPicker Resolves pipeline execution debugging issues. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-26 08:01:33 +01:00 · 2026-03-26 08:01:33 +01:00 · 7f2ba4fbad
commit 7f2ba4fbad
parent 4ba03c2a94
4 changed files with 211 additions and 33 deletions
--- a/backend/prompt_executor.py
+++ b/backend/prompt_executor.py
@ -14,29 +14,50 @@ from db import get_db, get_cursor, r2d
 from fastapi import HTTPException


-def resolve_placeholders(template: str, variables: Dict[str, Any]) -> str:
+def resolve_placeholders(template: str, variables: Dict[str, Any], debug_info: Optional[Dict] = None) -> str:
    """
    Replace {{placeholder}} with values from variables dict.

    Args:
        template: String with {{key}} placeholders
        variables: Dict of key -> value mappings
+        debug_info: Optional dict to collect debug information

    Returns:
        Template with placeholders replaced
    """
+    resolved = {}
+    unresolved = []
+
    def replacer(match):
        key = match.group(1).strip()
        if key in variables:
            value = variables[key]
            # Convert dict/list to JSON string
            if isinstance(value, (dict, list)):
-                return json.dumps(value, ensure_ascii=False)
-            return str(value)
-        # Keep placeholder if no value found
-        return match.group(0)
+                resolved_value = json.dumps(value, ensure_ascii=False)
+            else:
+                resolved_value = str(value)

-    return re.sub(r'\{\{([^}]+)\}\}', replacer, template)
+            # Track resolution for debug
+            if debug_info is not None:
+                resolved[key] = resolved_value[:100] + ('...' if len(resolved_value) > 100 else '')
+
+            return resolved_value
+        else:
+            # Keep placeholder if no value found
+            if debug_info is not None:
+                unresolved.append(key)
+            return match.group(0)
+
+    result = re.sub(r'\{\{([^}]+)\}\}', replacer, template)
+
+    # Store debug info
+    if debug_info is not None:
+        debug_info['resolved_placeholders'] = resolved
+        debug_info['unresolved_placeholders'] = unresolved
+
+    return result


 def validate_json_output(output: str, schema: Optional[Dict] = None) -> Dict:
@ -67,7 +88,8 @@ def validate_json_output(output: str, schema: Optional[Dict] = None) -> Dict:
 async def execute_prompt(
    prompt_slug: str,
    variables: Dict[str, Any],
-    openrouter_call_func
+    openrouter_call_func,
+    enable_debug: bool = False
 ) -> Dict[str, Any]:
    """
    Execute a single prompt (base or pipeline type).
@ -76,6 +98,7 @@ async def execute_prompt(
        prompt_slug: Slug of prompt to execute
        variables: Dict of variables for placeholder replacement
        openrouter_call_func: Async function(prompt_text) -> response_text
+        enable_debug: If True, include debug information in response

    Returns:
        Dict with execution results:
@ -84,6 +107,7 @@ async def execute_prompt(
            "slug": "...",
            "output": "..." | {...},  # String or parsed JSON
            "stages": [...]  # Only for pipeline type
+            "debug": {...}  # Only if enable_debug=True
        }
    """
    # Load prompt from database
@ -104,11 +128,11 @@ async def execute_prompt(

    if prompt_type == 'base':
        # Base prompt: single execution with template
-        return await execute_base_prompt(prompt, variables, openrouter_call_func)
+        return await execute_base_prompt(prompt, variables, openrouter_call_func, enable_debug)

    elif prompt_type == 'pipeline':
        # Pipeline prompt: multi-stage execution
-        return await execute_pipeline_prompt(prompt, variables, openrouter_call_func)
+        return await execute_pipeline_prompt(prompt, variables, openrouter_call_func, enable_debug)

    else:
        raise HTTPException(400, f"Unknown prompt type: {prompt_type}")
@ -117,19 +141,31 @@ async def execute_prompt(
 async def execute_base_prompt(
    prompt: Dict,
    variables: Dict[str, Any],
-    openrouter_call_func
+    openrouter_call_func,
+    enable_debug: bool = False
 ) -> Dict[str, Any]:
    """Execute a base-type prompt (single template)."""
    template = prompt.get('template')
    if not template:
        raise HTTPException(400, f"Base prompt missing template: {prompt['slug']}")

+    debug_info = {} if enable_debug else None
+
    # Resolve placeholders
-    prompt_text = resolve_placeholders(template, variables)
+    prompt_text = resolve_placeholders(template, variables, debug_info)
+
+    if enable_debug:
+        debug_info['template'] = template
+        debug_info['final_prompt'] = prompt_text[:500] + ('...' if len(prompt_text) > 500 else '')
+        debug_info['available_variables'] = list(variables.keys())

    # Call AI
    response = await openrouter_call_func(prompt_text)

+    if enable_debug:
+        debug_info['ai_response_length'] = len(response)
+        debug_info['ai_response_preview'] = response[:200] + ('...' if len(response) > 200 else '')
+
    # Validate JSON if required
    output_format = prompt.get('output_format', 'text')
    if output_format == 'json':
@ -137,18 +173,24 @@ async def execute_base_prompt(
    else:
        output = response

-    return {
+    result = {
        "type": "base",
        "slug": prompt['slug'],
        "output": output,
        "output_format": output_format
    }

+    if enable_debug:
+        result['debug'] = debug_info
+
+    return result
+

 async def execute_pipeline_prompt(
    prompt: Dict,
    variables: Dict[str, Any],
-    openrouter_call_func
+    openrouter_call_func,
+    enable_debug: bool = False
 ) -> Dict[str, Any]:
    """
    Execute a pipeline-type prompt (multi-stage).
@ -165,6 +207,7 @@ async def execute_pipeline_prompt(

    stage_results = []
    context_vars = variables.copy()
+    pipeline_debug = [] if enable_debug else None

    # Execute stages in order
    for stage_def in sorted(stages, key=lambda s: s['stage']):
@ -174,6 +217,12 @@ async def execute_pipeline_prompt(
        if not stage_prompts:
            continue

+        stage_debug = {} if enable_debug else None
+        if enable_debug:
+            stage_debug['stage'] = stage_num
+            stage_debug['available_variables'] = list(context_vars.keys())
+            stage_debug['prompts'] = []
+
        # Execute all prompts in this stage (parallel concept, sequential impl for now)
        stage_outputs = {}

@ -182,25 +231,46 @@ async def execute_pipeline_prompt(
            output_key = prompt_def.get('output_key', f'stage{stage_num}')
            output_format = prompt_def.get('output_format', 'text')

+            prompt_debug = {} if enable_debug else None
+
            if source == 'reference':
                # Reference to another prompt
                ref_slug = prompt_def.get('slug')
                if not ref_slug:
                    raise HTTPException(400, f"Reference prompt missing slug in stage {stage_num}")

+                if enable_debug:
+                    prompt_debug['source'] = 'reference'
+                    prompt_debug['ref_slug'] = ref_slug
+
                # Load referenced prompt
-                result = await execute_prompt(ref_slug, context_vars, openrouter_call_func)
+                result = await execute_prompt(ref_slug, context_vars, openrouter_call_func, enable_debug)
                output = result['output']

+                if enable_debug and 'debug' in result:
+                    prompt_debug['ref_debug'] = result['debug']
+
            elif source == 'inline':
                # Inline template
                template = prompt_def.get('template')
                if not template:
                    raise HTTPException(400, f"Inline prompt missing template in stage {stage_num}")

-                prompt_text = resolve_placeholders(template, context_vars)
+                placeholder_debug = {} if enable_debug else None
+                prompt_text = resolve_placeholders(template, context_vars, placeholder_debug)
+
+                if enable_debug:
+                    prompt_debug['source'] = 'inline'
+                    prompt_debug['template'] = template
+                    prompt_debug['final_prompt'] = prompt_text[:500] + ('...' if len(prompt_text) > 500 else '')
+                    prompt_debug.update(placeholder_debug)
+
                response = await openrouter_call_func(prompt_text)

+                if enable_debug:
+                    prompt_debug['ai_response_length'] = len(response)
+                    prompt_debug['ai_response_preview'] = response[:200] + ('...' if len(response) > 200 else '')
+
                # Validate JSON if required
                if output_format == 'json':
                    output = validate_json_output(response, prompt_def.get('output_schema'))
@ -214,17 +284,26 @@ async def execute_pipeline_prompt(
            stage_outputs[output_key] = output

            # Add to context for next stage
-            context_vars[f'stage_{stage_num}_{output_key}'] = output
+            context_var_key = f'stage_{stage_num}_{output_key}'
+            context_vars[context_var_key] = output
+
+            if enable_debug:
+                prompt_debug['output_key'] = output_key
+                prompt_debug['context_var_key'] = context_var_key
+                stage_debug['prompts'].append(prompt_debug)

        stage_results.append({
            "stage": stage_num,
            "outputs": stage_outputs
        })

+        if enable_debug:
+            pipeline_debug.append(stage_debug)
+
    # Final output is last stage's first output
    final_output = stage_results[-1]['outputs'] if stage_results else {}

-    return {
+    result = {
        "type": "pipeline",
        "slug": prompt['slug'],
        "stages": stage_results,
@ -232,13 +311,22 @@ async def execute_pipeline_prompt(
        "output_format": prompt.get('output_format', 'text')
    }

+    if enable_debug:
+        result['debug'] = {
+            'initial_variables': list(variables.keys()),
+            'stages': pipeline_debug
+        }
+
+    return result
+

 async def execute_prompt_with_data(
    prompt_slug: str,
    profile_id: str,
    modules: Optional[Dict[str, bool]] = None,
    timeframes: Optional[Dict[str, int]] = None,
-    openrouter_call_func = None
+    openrouter_call_func = None,
+    enable_debug: bool = False
 ) -> Dict[str, Any]:
    """
    Execute prompt with data loaded from database.
@ -249,6 +337,7 @@ async def execute_prompt_with_data(
        modules: Dict of module -> enabled (e.g., {"körper": true})
        timeframes: Dict of module -> days (e.g., {"körper": 30})
        openrouter_call_func: Async function for AI calls
+        enable_debug: If True, include debug information in response

    Returns:
        Execution result dict
@ -348,4 +437,4 @@ async def execute_prompt_with_data(
                variables['goals_data'] = []

    # Execute prompt
-    return await execute_prompt(prompt_slug, variables, openrouter_call_func)
+    return await execute_prompt(prompt_slug, variables, openrouter_call_func, enable_debug)
--- a/backend/routers/prompts.py
+++ b/backend/routers/prompts.py
@ -699,6 +699,7 @@ async def execute_unified_prompt(
    prompt_slug: str,
    modules: Optional[dict] = None,
    timeframes: Optional[dict] = None,
+    debug: bool = False,
    session: dict = Depends(require_auth)
 ):
    """
@ -708,9 +709,10 @@ async def execute_unified_prompt(
        prompt_slug: Slug of prompt to execute
        modules: Dict of enabled modules (e.g., {"körper": true})
        timeframes: Dict of timeframes per module (e.g., {"körper": 30})
+        debug: If true, include debug information (placeholders, final prompts, etc.)

    Returns:
-        Execution result with outputs
+        Execution result with outputs (and debug info if debug=true)
    """
    profile_id = session['profile_id']

@ -739,7 +741,8 @@ async def execute_unified_prompt(
        profile_id=profile_id,
        modules=modules,
        timeframes=timeframes,
-        openrouter_call_func=call_openrouter
+        openrouter_call_func=call_openrouter,
+        enable_debug=debug
    )

    return result
--- a/frontend/src/components/UnifiedPromptModal.jsx
+++ b/frontend/src/components/UnifiedPromptModal.jsx
@ -40,6 +40,11 @@ export default function UnifiedPromptModal({ prompt, onSave, onClose }) {
  const [showPlaceholderPicker, setShowPlaceholderPicker] = useState(false)
  const [pickerTarget, setPickerTarget] = useState(null) // 'base' or {stage, promptIdx}

+  // Test functionality
+  const [testing, setTesting] = useState(false)
+  const [testResult, setTestResult] = useState(null)
+  const [showDebug, setShowDebug] = useState(false)
+
  useEffect(() => {
    loadAvailablePrompts()

@ -231,6 +236,28 @@ export default function UnifiedPromptModal({ prompt, onSave, onClose }) {
    }
  }

+  const handleTest = async () => {
+    // Can only test existing prompts (need slug in database)
+    if (!prompt?.slug) {
+      setError('Bitte erst speichern, dann testen')
+      return
+    }
+
+    setTesting(true)
+    setError(null)
+    setTestResult(null)
+
+    try {
+      const result = await api.executeUnifiedPrompt(prompt.slug, null, null, true)
+      setTestResult(result)
+      setShowDebug(true)
+    } catch (e) {
+      setError('Test-Fehler: ' + e.message)
+    } finally {
+      setTesting(false)
+    }
+  }
+
  return (
    <div style={{
      position: 'fixed', inset: 0, background: 'rgba(0,0,0,0.5)',
@ -567,21 +594,79 @@ export default function UnifiedPromptModal({ prompt, onSave, onClose }) {
          </div>
        )}

+        {/* Debug Output */}
+        {showDebug && testResult && (
+          <div style={{
+            marginTop: 16,
+            padding: 16,
+            background: 'var(--surface2)',
+            borderRadius: 8,
+            border: '1px solid var(--border)'
+          }}>
+            <div style={{
+              display: 'flex',
+              justifyContent: 'space-between',
+              alignItems: 'center',
+              marginBottom: 12
+            }}>
+              <h3 style={{ margin: 0, fontSize: 14, fontWeight: 600 }}>
+                🔬 Debug-Info
+              </h3>
+              <button
+                onClick={() => setShowDebug(false)}
+                style={{ background: 'none', border: 'none', cursor: 'pointer', padding: 4 }}
+              >
+                <X size={16} color="var(--text3)" />
+              </button>
+            </div>
+            <pre style={{
+              fontSize: 11,
+              fontFamily: 'monospace',
+              background: 'var(--bg)',
+              padding: 12,
+              borderRadius: 6,
+              overflow: 'auto',
+              maxHeight: 400,
+              lineHeight: 1.5,
+              color: 'var(--text2)'
+            }}>
+              {JSON.stringify(testResult.debug || testResult, null, 2)}
+            </pre>
+          </div>
+        )}
+
        {/* Actions */}
        <div style={{
-          display: 'flex', gap: 12, justifyContent: 'flex-end',
+          display: 'flex', gap: 12, justifyContent: 'space-between',
          paddingTop: 16, borderTop: '1px solid var(--border)'
        }}>
-          <button className="btn" onClick={onClose}>
-            Abbrechen
-          </button>
-          <button
-            className="btn btn-primary"
-            onClick={handleSave}
-            disabled={loading}
-          >
-            {loading ? 'Speichern...' : 'Speichern'}
-          </button>
+          <div>
+            {prompt?.slug && (
+              <button
+                className="btn"
+                onClick={handleTest}
+                disabled={testing || loading}
+                style={{
+                  background: testing ? 'var(--surface)' : 'var(--accent)',
+                  color: testing ? 'var(--text3)' : 'white'
+                }}
+              >
+                {testing ? '🔬 Teste...' : '🔬 Test ausführen'}
+              </button>
+            )}
+          </div>
+          <div style={{ display: 'flex', gap: 12 }}>
+            <button className="btn" onClick={onClose}>
+              Abbrechen
+            </button>
+            <button
+              className="btn btn-primary"
+              onClick={handleSave}
+              disabled={loading}
+            >
+              {loading ? 'Speichern...' : 'Speichern'}
+            </button>
+          </div>
        </div>
      </div>

--- a/frontend/src/utils/api.js
+++ b/frontend/src/utils/api.js
@ -307,8 +307,9 @@ export const api = {
  executePipeline:        (configId=null) => req('/insights/pipeline' + (configId ? `?config_id=${configId}` : ''), json({})),

  // Unified Prompt System (Issue #28 Phase 2)
-  executeUnifiedPrompt:   (slug, modules=null, timeframes=null) => {
+  executeUnifiedPrompt:   (slug, modules=null, timeframes=null, debug=false) => {
    const params = new URLSearchParams({ prompt_slug: slug })
+    if (debug) params.append('debug', 'true')
    const body = {}
    if (modules) body.modules = modules
    if (timeframes) body.timeframes = timeframes