feat: debug system for prompt execution (Issue #28)
All checks were successful
Deploy Development / deploy (push) Successful in 50s
Build Test / lint-backend (push) Successful in 0s
Build Test / build-frontend (push) Successful in 13s

- Backend: debug mode in prompt_executor with placeholder tracking
- Backend: show resolved/unresolved placeholders, final prompts, AI responses
- Frontend: test button in UnifiedPromptModal for saved prompts
- Frontend: debug output viewer with JSON preview
- Frontend: wider placeholder example fields in PlaceholderPicker

Resolves pipeline execution debugging issues.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Lars 2026-03-26 08:01:33 +01:00
parent 4ba03c2a94
commit 7f2ba4fbad
4 changed files with 211 additions and 33 deletions

View File

@ -14,29 +14,50 @@ from db import get_db, get_cursor, r2d
from fastapi import HTTPException from fastapi import HTTPException
def resolve_placeholders(template: str, variables: Dict[str, Any]) -> str: def resolve_placeholders(template: str, variables: Dict[str, Any], debug_info: Optional[Dict] = None) -> str:
""" """
Replace {{placeholder}} with values from variables dict. Replace {{placeholder}} with values from variables dict.
Args: Args:
template: String with {{key}} placeholders template: String with {{key}} placeholders
variables: Dict of key -> value mappings variables: Dict of key -> value mappings
debug_info: Optional dict to collect debug information
Returns: Returns:
Template with placeholders replaced Template with placeholders replaced
""" """
resolved = {}
unresolved = []
def replacer(match): def replacer(match):
key = match.group(1).strip() key = match.group(1).strip()
if key in variables: if key in variables:
value = variables[key] value = variables[key]
# Convert dict/list to JSON string # Convert dict/list to JSON string
if isinstance(value, (dict, list)): if isinstance(value, (dict, list)):
return json.dumps(value, ensure_ascii=False) resolved_value = json.dumps(value, ensure_ascii=False)
return str(value) else:
# Keep placeholder if no value found resolved_value = str(value)
return match.group(0)
return re.sub(r'\{\{([^}]+)\}\}', replacer, template) # Track resolution for debug
if debug_info is not None:
resolved[key] = resolved_value[:100] + ('...' if len(resolved_value) > 100 else '')
return resolved_value
else:
# Keep placeholder if no value found
if debug_info is not None:
unresolved.append(key)
return match.group(0)
result = re.sub(r'\{\{([^}]+)\}\}', replacer, template)
# Store debug info
if debug_info is not None:
debug_info['resolved_placeholders'] = resolved
debug_info['unresolved_placeholders'] = unresolved
return result
def validate_json_output(output: str, schema: Optional[Dict] = None) -> Dict: def validate_json_output(output: str, schema: Optional[Dict] = None) -> Dict:
@ -67,7 +88,8 @@ def validate_json_output(output: str, schema: Optional[Dict] = None) -> Dict:
async def execute_prompt( async def execute_prompt(
prompt_slug: str, prompt_slug: str,
variables: Dict[str, Any], variables: Dict[str, Any],
openrouter_call_func openrouter_call_func,
enable_debug: bool = False
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Execute a single prompt (base or pipeline type). Execute a single prompt (base or pipeline type).
@ -76,6 +98,7 @@ async def execute_prompt(
prompt_slug: Slug of prompt to execute prompt_slug: Slug of prompt to execute
variables: Dict of variables for placeholder replacement variables: Dict of variables for placeholder replacement
openrouter_call_func: Async function(prompt_text) -> response_text openrouter_call_func: Async function(prompt_text) -> response_text
enable_debug: If True, include debug information in response
Returns: Returns:
Dict with execution results: Dict with execution results:
@ -84,6 +107,7 @@ async def execute_prompt(
"slug": "...", "slug": "...",
"output": "..." | {...}, # String or parsed JSON "output": "..." | {...}, # String or parsed JSON
"stages": [...] # Only for pipeline type "stages": [...] # Only for pipeline type
"debug": {...} # Only if enable_debug=True
} }
""" """
# Load prompt from database # Load prompt from database
@ -104,11 +128,11 @@ async def execute_prompt(
if prompt_type == 'base': if prompt_type == 'base':
# Base prompt: single execution with template # Base prompt: single execution with template
return await execute_base_prompt(prompt, variables, openrouter_call_func) return await execute_base_prompt(prompt, variables, openrouter_call_func, enable_debug)
elif prompt_type == 'pipeline': elif prompt_type == 'pipeline':
# Pipeline prompt: multi-stage execution # Pipeline prompt: multi-stage execution
return await execute_pipeline_prompt(prompt, variables, openrouter_call_func) return await execute_pipeline_prompt(prompt, variables, openrouter_call_func, enable_debug)
else: else:
raise HTTPException(400, f"Unknown prompt type: {prompt_type}") raise HTTPException(400, f"Unknown prompt type: {prompt_type}")
@ -117,19 +141,31 @@ async def execute_prompt(
async def execute_base_prompt( async def execute_base_prompt(
prompt: Dict, prompt: Dict,
variables: Dict[str, Any], variables: Dict[str, Any],
openrouter_call_func openrouter_call_func,
enable_debug: bool = False
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Execute a base-type prompt (single template).""" """Execute a base-type prompt (single template)."""
template = prompt.get('template') template = prompt.get('template')
if not template: if not template:
raise HTTPException(400, f"Base prompt missing template: {prompt['slug']}") raise HTTPException(400, f"Base prompt missing template: {prompt['slug']}")
debug_info = {} if enable_debug else None
# Resolve placeholders # Resolve placeholders
prompt_text = resolve_placeholders(template, variables) prompt_text = resolve_placeholders(template, variables, debug_info)
if enable_debug:
debug_info['template'] = template
debug_info['final_prompt'] = prompt_text[:500] + ('...' if len(prompt_text) > 500 else '')
debug_info['available_variables'] = list(variables.keys())
# Call AI # Call AI
response = await openrouter_call_func(prompt_text) response = await openrouter_call_func(prompt_text)
if enable_debug:
debug_info['ai_response_length'] = len(response)
debug_info['ai_response_preview'] = response[:200] + ('...' if len(response) > 200 else '')
# Validate JSON if required # Validate JSON if required
output_format = prompt.get('output_format', 'text') output_format = prompt.get('output_format', 'text')
if output_format == 'json': if output_format == 'json':
@ -137,18 +173,24 @@ async def execute_base_prompt(
else: else:
output = response output = response
return { result = {
"type": "base", "type": "base",
"slug": prompt['slug'], "slug": prompt['slug'],
"output": output, "output": output,
"output_format": output_format "output_format": output_format
} }
if enable_debug:
result['debug'] = debug_info
return result
async def execute_pipeline_prompt( async def execute_pipeline_prompt(
prompt: Dict, prompt: Dict,
variables: Dict[str, Any], variables: Dict[str, Any],
openrouter_call_func openrouter_call_func,
enable_debug: bool = False
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Execute a pipeline-type prompt (multi-stage). Execute a pipeline-type prompt (multi-stage).
@ -165,6 +207,7 @@ async def execute_pipeline_prompt(
stage_results = [] stage_results = []
context_vars = variables.copy() context_vars = variables.copy()
pipeline_debug = [] if enable_debug else None
# Execute stages in order # Execute stages in order
for stage_def in sorted(stages, key=lambda s: s['stage']): for stage_def in sorted(stages, key=lambda s: s['stage']):
@ -174,6 +217,12 @@ async def execute_pipeline_prompt(
if not stage_prompts: if not stage_prompts:
continue continue
stage_debug = {} if enable_debug else None
if enable_debug:
stage_debug['stage'] = stage_num
stage_debug['available_variables'] = list(context_vars.keys())
stage_debug['prompts'] = []
# Execute all prompts in this stage (parallel concept, sequential impl for now) # Execute all prompts in this stage (parallel concept, sequential impl for now)
stage_outputs = {} stage_outputs = {}
@ -182,25 +231,46 @@ async def execute_pipeline_prompt(
output_key = prompt_def.get('output_key', f'stage{stage_num}') output_key = prompt_def.get('output_key', f'stage{stage_num}')
output_format = prompt_def.get('output_format', 'text') output_format = prompt_def.get('output_format', 'text')
prompt_debug = {} if enable_debug else None
if source == 'reference': if source == 'reference':
# Reference to another prompt # Reference to another prompt
ref_slug = prompt_def.get('slug') ref_slug = prompt_def.get('slug')
if not ref_slug: if not ref_slug:
raise HTTPException(400, f"Reference prompt missing slug in stage {stage_num}") raise HTTPException(400, f"Reference prompt missing slug in stage {stage_num}")
if enable_debug:
prompt_debug['source'] = 'reference'
prompt_debug['ref_slug'] = ref_slug
# Load referenced prompt # Load referenced prompt
result = await execute_prompt(ref_slug, context_vars, openrouter_call_func) result = await execute_prompt(ref_slug, context_vars, openrouter_call_func, enable_debug)
output = result['output'] output = result['output']
if enable_debug and 'debug' in result:
prompt_debug['ref_debug'] = result['debug']
elif source == 'inline': elif source == 'inline':
# Inline template # Inline template
template = prompt_def.get('template') template = prompt_def.get('template')
if not template: if not template:
raise HTTPException(400, f"Inline prompt missing template in stage {stage_num}") raise HTTPException(400, f"Inline prompt missing template in stage {stage_num}")
prompt_text = resolve_placeholders(template, context_vars) placeholder_debug = {} if enable_debug else None
prompt_text = resolve_placeholders(template, context_vars, placeholder_debug)
if enable_debug:
prompt_debug['source'] = 'inline'
prompt_debug['template'] = template
prompt_debug['final_prompt'] = prompt_text[:500] + ('...' if len(prompt_text) > 500 else '')
prompt_debug.update(placeholder_debug)
response = await openrouter_call_func(prompt_text) response = await openrouter_call_func(prompt_text)
if enable_debug:
prompt_debug['ai_response_length'] = len(response)
prompt_debug['ai_response_preview'] = response[:200] + ('...' if len(response) > 200 else '')
# Validate JSON if required # Validate JSON if required
if output_format == 'json': if output_format == 'json':
output = validate_json_output(response, prompt_def.get('output_schema')) output = validate_json_output(response, prompt_def.get('output_schema'))
@ -214,17 +284,26 @@ async def execute_pipeline_prompt(
stage_outputs[output_key] = output stage_outputs[output_key] = output
# Add to context for next stage # Add to context for next stage
context_vars[f'stage_{stage_num}_{output_key}'] = output context_var_key = f'stage_{stage_num}_{output_key}'
context_vars[context_var_key] = output
if enable_debug:
prompt_debug['output_key'] = output_key
prompt_debug['context_var_key'] = context_var_key
stage_debug['prompts'].append(prompt_debug)
stage_results.append({ stage_results.append({
"stage": stage_num, "stage": stage_num,
"outputs": stage_outputs "outputs": stage_outputs
}) })
if enable_debug:
pipeline_debug.append(stage_debug)
# Final output is last stage's first output # Final output is last stage's first output
final_output = stage_results[-1]['outputs'] if stage_results else {} final_output = stage_results[-1]['outputs'] if stage_results else {}
return { result = {
"type": "pipeline", "type": "pipeline",
"slug": prompt['slug'], "slug": prompt['slug'],
"stages": stage_results, "stages": stage_results,
@ -232,13 +311,22 @@ async def execute_pipeline_prompt(
"output_format": prompt.get('output_format', 'text') "output_format": prompt.get('output_format', 'text')
} }
if enable_debug:
result['debug'] = {
'initial_variables': list(variables.keys()),
'stages': pipeline_debug
}
return result
async def execute_prompt_with_data( async def execute_prompt_with_data(
prompt_slug: str, prompt_slug: str,
profile_id: str, profile_id: str,
modules: Optional[Dict[str, bool]] = None, modules: Optional[Dict[str, bool]] = None,
timeframes: Optional[Dict[str, int]] = None, timeframes: Optional[Dict[str, int]] = None,
openrouter_call_func = None openrouter_call_func = None,
enable_debug: bool = False
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Execute prompt with data loaded from database. Execute prompt with data loaded from database.
@ -249,6 +337,7 @@ async def execute_prompt_with_data(
modules: Dict of module -> enabled (e.g., {"körper": true}) modules: Dict of module -> enabled (e.g., {"körper": true})
timeframes: Dict of module -> days (e.g., {"körper": 30}) timeframes: Dict of module -> days (e.g., {"körper": 30})
openrouter_call_func: Async function for AI calls openrouter_call_func: Async function for AI calls
enable_debug: If True, include debug information in response
Returns: Returns:
Execution result dict Execution result dict
@ -348,4 +437,4 @@ async def execute_prompt_with_data(
variables['goals_data'] = [] variables['goals_data'] = []
# Execute prompt # Execute prompt
return await execute_prompt(prompt_slug, variables, openrouter_call_func) return await execute_prompt(prompt_slug, variables, openrouter_call_func, enable_debug)

View File

@ -699,6 +699,7 @@ async def execute_unified_prompt(
prompt_slug: str, prompt_slug: str,
modules: Optional[dict] = None, modules: Optional[dict] = None,
timeframes: Optional[dict] = None, timeframes: Optional[dict] = None,
debug: bool = False,
session: dict = Depends(require_auth) session: dict = Depends(require_auth)
): ):
""" """
@ -708,9 +709,10 @@ async def execute_unified_prompt(
prompt_slug: Slug of prompt to execute prompt_slug: Slug of prompt to execute
modules: Dict of enabled modules (e.g., {"körper": true}) modules: Dict of enabled modules (e.g., {"körper": true})
timeframes: Dict of timeframes per module (e.g., {"körper": 30}) timeframes: Dict of timeframes per module (e.g., {"körper": 30})
debug: If true, include debug information (placeholders, final prompts, etc.)
Returns: Returns:
Execution result with outputs Execution result with outputs (and debug info if debug=true)
""" """
profile_id = session['profile_id'] profile_id = session['profile_id']
@ -739,7 +741,8 @@ async def execute_unified_prompt(
profile_id=profile_id, profile_id=profile_id,
modules=modules, modules=modules,
timeframes=timeframes, timeframes=timeframes,
openrouter_call_func=call_openrouter openrouter_call_func=call_openrouter,
enable_debug=debug
) )
return result return result

View File

@ -40,6 +40,11 @@ export default function UnifiedPromptModal({ prompt, onSave, onClose }) {
const [showPlaceholderPicker, setShowPlaceholderPicker] = useState(false) const [showPlaceholderPicker, setShowPlaceholderPicker] = useState(false)
const [pickerTarget, setPickerTarget] = useState(null) // 'base' or {stage, promptIdx} const [pickerTarget, setPickerTarget] = useState(null) // 'base' or {stage, promptIdx}
// Test functionality
const [testing, setTesting] = useState(false)
const [testResult, setTestResult] = useState(null)
const [showDebug, setShowDebug] = useState(false)
useEffect(() => { useEffect(() => {
loadAvailablePrompts() loadAvailablePrompts()
@ -231,6 +236,28 @@ export default function UnifiedPromptModal({ prompt, onSave, onClose }) {
} }
} }
const handleTest = async () => {
// Can only test existing prompts (need slug in database)
if (!prompt?.slug) {
setError('Bitte erst speichern, dann testen')
return
}
setTesting(true)
setError(null)
setTestResult(null)
try {
const result = await api.executeUnifiedPrompt(prompt.slug, null, null, true)
setTestResult(result)
setShowDebug(true)
} catch (e) {
setError('Test-Fehler: ' + e.message)
} finally {
setTesting(false)
}
}
return ( return (
<div style={{ <div style={{
position: 'fixed', inset: 0, background: 'rgba(0,0,0,0.5)', position: 'fixed', inset: 0, background: 'rgba(0,0,0,0.5)',
@ -567,21 +594,79 @@ export default function UnifiedPromptModal({ prompt, onSave, onClose }) {
</div> </div>
)} )}
{/* Debug Output */}
{showDebug && testResult && (
<div style={{
marginTop: 16,
padding: 16,
background: 'var(--surface2)',
borderRadius: 8,
border: '1px solid var(--border)'
}}>
<div style={{
display: 'flex',
justifyContent: 'space-between',
alignItems: 'center',
marginBottom: 12
}}>
<h3 style={{ margin: 0, fontSize: 14, fontWeight: 600 }}>
🔬 Debug-Info
</h3>
<button
onClick={() => setShowDebug(false)}
style={{ background: 'none', border: 'none', cursor: 'pointer', padding: 4 }}
>
<X size={16} color="var(--text3)" />
</button>
</div>
<pre style={{
fontSize: 11,
fontFamily: 'monospace',
background: 'var(--bg)',
padding: 12,
borderRadius: 6,
overflow: 'auto',
maxHeight: 400,
lineHeight: 1.5,
color: 'var(--text2)'
}}>
{JSON.stringify(testResult.debug || testResult, null, 2)}
</pre>
</div>
)}
{/* Actions */} {/* Actions */}
<div style={{ <div style={{
display: 'flex', gap: 12, justifyContent: 'flex-end', display: 'flex', gap: 12, justifyContent: 'space-between',
paddingTop: 16, borderTop: '1px solid var(--border)' paddingTop: 16, borderTop: '1px solid var(--border)'
}}> }}>
<button className="btn" onClick={onClose}> <div>
Abbrechen {prompt?.slug && (
</button> <button
<button className="btn"
className="btn btn-primary" onClick={handleTest}
onClick={handleSave} disabled={testing || loading}
disabled={loading} style={{
> background: testing ? 'var(--surface)' : 'var(--accent)',
{loading ? 'Speichern...' : 'Speichern'} color: testing ? 'var(--text3)' : 'white'
</button> }}
>
{testing ? '🔬 Teste...' : '🔬 Test ausführen'}
</button>
)}
</div>
<div style={{ display: 'flex', gap: 12 }}>
<button className="btn" onClick={onClose}>
Abbrechen
</button>
<button
className="btn btn-primary"
onClick={handleSave}
disabled={loading}
>
{loading ? 'Speichern...' : 'Speichern'}
</button>
</div>
</div> </div>
</div> </div>

View File

@ -307,8 +307,9 @@ export const api = {
executePipeline: (configId=null) => req('/insights/pipeline' + (configId ? `?config_id=${configId}` : ''), json({})), executePipeline: (configId=null) => req('/insights/pipeline' + (configId ? `?config_id=${configId}` : ''), json({})),
// Unified Prompt System (Issue #28 Phase 2) // Unified Prompt System (Issue #28 Phase 2)
executeUnifiedPrompt: (slug, modules=null, timeframes=null) => { executeUnifiedPrompt: (slug, modules=null, timeframes=null, debug=false) => {
const params = new URLSearchParams({ prompt_slug: slug }) const params = new URLSearchParams({ prompt_slug: slug })
if (debug) params.append('debug', 'true')
const body = {} const body = {}
if (modules) body.modules = modules if (modules) body.modules = modules
if (timeframes) body.timeframes = timeframes if (timeframes) body.timeframes = timeframes