From d6741cd90453a41857cf446e873efb8532e9feed Mon Sep 17 00:00:00 2001 From: MrFlounder Date: Sat, 21 Feb 2026 16:12:52 -0800 Subject: [PATCH 1/2] fix(crab-pf): replace broken verify with smoke+session test, add session handling The verify tool was false-positive: configs had a `redteam` section but no `tests` array, so `promptfoo eval` ran zero tests and reported success. Changes: - Replace redteam config with 2 simple test cases + defaultTest assertion - Rewrite verify: direct callApi smoke test + session test, then promptfoo eval - Add cache-busting for dynamic imports (Node caches rewritten provider.js) - Parse eval output properly (require >0 passed, 0 failed) - Add session handling to system prompt (callApi signature, sessionId contract) - Update tool schema (remove unused numTests param) - Fix GPT-5/o1/o3 compat (max_completion_tokens, omit temperature) Co-Authored-By: Claude Opus 4.6 --- plugins/promptfoo/src/agent/loop.ts | 109 ++++++++++++++----- plugins/promptfoo/src/agent/providers.ts | 8 +- plugins/promptfoo/src/agent/system-prompt.ts | 34 ++++-- plugins/promptfoo/src/agent/tools.ts | 7 +- plugins/promptfoo/src/generator/config.ts | 24 ++-- 5 files changed, 123 insertions(+), 59 deletions(-) diff --git a/plugins/promptfoo/src/agent/loop.ts b/plugins/promptfoo/src/agent/loop.ts index 4585b07..f90e3af 100644 --- a/plugins/promptfoo/src/agent/loop.ts +++ b/plugins/promptfoo/src/agent/loop.ts @@ -13,7 +13,9 @@ import { toOpenAITools, toAnthropicTools } from './tools.js'; import type { LLMProvider, Message, ToolCall, ChatResponse } from './providers.js'; import type { DiscoveryResult } from '../types.js'; import * as fs from 'node:fs'; +import * as path from 'node:path'; import { execSync } from 'node:child_process'; +import { pathToFileURL } from 'node:url'; export interface AgentOptions { context: string; // Raw artifact or description @@ -76,7 +78,7 @@ Steps: 2. Send a probe to verify connectivity 3. Identify the prompt field and response field 4. Generate the config (and provider file if needed) -5. Verify it works with a mini redteam test +5. Verify it works 6. Call done() when complete`, }, ]; @@ -268,60 +270,113 @@ async function executeTool( } case 'verify': { - const { configFile, numTests } = args as { + const { configFile } = args as { configFile?: string; - numTests?: number; }; const configPath = configFile || state.configFile || 'promptfooconfig.yaml'; + const steps: string[] = []; + + // Step 1: Direct provider smoke + session test + const providerPath = path.join(outputDir, 'provider.js'); + if (fs.existsSync(providerPath)) { + // Install dependencies first if package.json exists + const packageJsonPath = path.join(outputDir, 'package.json'); + if (fs.existsSync(packageJsonPath)) { + try { + execSync(`cd "${outputDir}" && npm install --silent 2>&1`, { + timeout: 60000, + encoding: 'utf-8', + }); + } catch { + // Ignore install errors, will surface in import + } + } - // Install dependencies if package.json exists - const packageJsonPath = `${outputDir}/package.json`; - if (fs.existsSync(packageJsonPath)) { - try { - execSync(`cd "${outputDir}" && npm install --silent 2>&1`, { - timeout: 60000, - encoding: 'utf-8', - }); - } catch { - // Ignore install errors, will fail in eval if deps missing + const providerUrl = pathToFileURL(path.resolve(providerPath)).href + `?t=${Date.now()}`; + const mod = await import(providerUrl); + const ProviderClass = mod.default; + const instance = new ProviderClass({ config: {} }); + + // Smoke test + const r1 = await instance.callApi('Hello, this is a test message', { vars: {} }, {}); + if (!r1 || !r1.output || r1.error) { + const err = r1?.error || 'empty output'; + steps.push(`Smoke test FAILED: ${err}`); + state.verified = false; + result = { success: false, error: `Provider smoke test failed: ${err}`, steps }; + break; + } + steps.push(`Smoke test PASSED: got ${r1.output.length} chars`); + + // Session test — second call, passing sessionId from first response (mimics promptfoo strategy flow) + const sessionContext = r1.sessionId + ? { vars: { sessionId: r1.sessionId } } + : { vars: {} }; + const r2 = await instance.callApi('Follow up question', sessionContext, {}); + if (!r2 || !r2.output || r2.error) { + const err = r2?.error || 'empty output'; + steps.push(`Session test FAILED: ${err}`); + state.verified = false; + result = { success: false, error: `Provider session test failed: ${err}`, steps }; + break; } + steps.push(`Session test PASSED: got ${r2.output.length} chars${r1.sessionId ? `, sessionId: ${r1.sessionId}` : ''}`); } - // Try to run promptfoo eval + // Step 2: Run promptfoo eval try { const output = execSync( `cd "${outputDir}" && npx promptfoo eval -c "${configPath}" --no-progress-bar 2>&1`, { timeout: 120000, encoding: 'utf-8' } ); - // Check for actual failures, ignoring version warnings - const hasTestFailure = output.includes('[FAIL]') || output.includes('Test failed'); + const passMatch = output.match(/(\d+) passed/); + const failMatch = output.match(/(\d+) failed/); + const errorMatch = output.match(/(\d+) error/); + const passed = passMatch ? parseInt(passMatch[1]) : 0; + const failed = failMatch ? parseInt(failMatch[1]) : 0; + const errors = errorMatch ? parseInt(errorMatch[1]) : 0; + const hasConfigError = output.includes('Error loading config') || output.includes('Invalid config'); - const hasProviderError = output.includes('Provider error') || output.includes('Connection refused'); - state.verified = !hasTestFailure && !hasConfigError && !hasProviderError; + if (passed === 0 && failed === 0) { + steps.push('Eval FAILED: zero tests ran'); + state.verified = false; + } else if (failed > 0 || errors > 0 || hasConfigError) { + steps.push(`Eval FAILED: ${passed} passed, ${failed} failed, ${errors} errors`); + state.verified = false; + } else { + steps.push(`Eval PASSED: ${passed} passed, ${failed} failed`); + state.verified = true; + } result = { success: state.verified, output: output.slice(0, 1000), + steps, }; } catch (error) { const err = error as { message: string; stdout?: string; stderr?: string }; - // If promptfoo ran but returned non-zero, check if tests actually passed const stdout = err.stdout || ''; - const hasPassingOutput = stdout.includes('[PASS]') || stdout.includes('Evaluation complete'); - result = { - success: hasPassingOutput, - error: hasPassingOutput ? undefined : err.message, - stdout: stdout.slice(0, 1000), - stderr: err.stderr?.slice(0, 500), - }; + const passMatch = stdout.match(/(\d+) passed/); + const passed = passMatch ? parseInt(passMatch[1]) : 0; - if (hasPassingOutput) { + if (passed > 0 && !stdout.includes('failed')) { + steps.push(`Eval PASSED (non-zero exit): ${passed} passed`); state.verified = true; + } else { + steps.push(`Eval FAILED: ${err.message.slice(0, 200)}`); + state.verified = false; } + + result = { + success: state.verified, + error: state.verified ? undefined : err.message, + stdout: stdout.slice(0, 1000), + steps, + }; } break; } diff --git a/plugins/promptfoo/src/agent/providers.ts b/plugins/promptfoo/src/agent/providers.ts index 35c3935..eef4ff4 100644 --- a/plugins/promptfoo/src/agent/providers.ts +++ b/plugins/promptfoo/src/agent/providers.ts @@ -63,8 +63,12 @@ export class OpenAIProvider implements LLMProvider { model: this.model, messages: options.messages.map((m) => this.toOpenAIMessage(m)), tools: options.tools, - max_tokens: options.maxTokens || 4096, - temperature: options.temperature ?? 0.7, + ...(this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3') + ? { max_completion_tokens: options.maxTokens || 4096 } + : { max_tokens: options.maxTokens || 4096 }), + ...(this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3') + ? {} + : { temperature: options.temperature ?? 0.7 }), }), }); diff --git a/plugins/promptfoo/src/agent/system-prompt.ts b/plugins/promptfoo/src/agent/system-prompt.ts index 1235dc3..b9527d5 100644 --- a/plugins/promptfoo/src/agent/system-prompt.ts +++ b/plugins/promptfoo/src/agent/system-prompt.ts @@ -10,7 +10,7 @@ export const DISCOVERY_SYSTEM_PROMPT = `You are a target discovery agent for pro 1. Probe the target to understand how it communicates 2. Generate a working promptfoo config (YAML + custom provider if needed) -3. Verify it works with a mini redteam test +3. Verify it works ## Tools @@ -18,7 +18,7 @@ export const DISCOVERY_SYSTEM_PROMPT = `You are a target discovery agent for pro - **probe_ws(url, message, headers?, timeout?)** - Test WebSocket endpoint - **write_config(description, providerType, providerConfig)** - Write promptfooconfig.yaml - **write_provider(code, filename, language)** - Write custom provider.js/py -- **verify()** - Run promptfoo eval to test the config +- **verify()** - Test provider directly (smoke + session), then run promptfoo eval - **done(summary, configFile, verified)** - Signal completion ## Promptfoo Config Format @@ -56,26 +56,46 @@ export default class Provider { return 'my-provider'; } - async callApi(prompt) { + async callApi(prompt, context, options) { + // context.vars.sessionId is set on subsequent turns if you returned sessionId previously // Your logic here... - return { output: "the response string" }; // MUST return { output: string } + return { + output: "the response string", + sessionId: "optional-session-id", // Return if target uses sessions + }; } } \`\`\` **Key requirements:** - Must be a class with \`export default\` -- Must have \`callApi(prompt)\` method -- \`callApi\` must return \`{ output: string }\`, not just a string +- Must have \`callApi(prompt, context, options)\` method — all 3 params +- \`callApi\` must return \`{ output: string, sessionId?: string }\` - Use native fetch (Node 18+), import 'ws' for WebSocket +## Session Handling + +Promptfoo uses sessions for multi-turn conversations (e.g. redteam attack strategies like Crescendo and GOAT). The flow works like this: + +1. Strategy calls \`callApi(prompt, context)\` on turn 1 +2. Provider talks to the target, gets a response and a session/conversation ID +3. Provider returns \`{ output: "...", sessionId: "abc123" }\` +4. Promptfoo stores the sessionId and passes it back on turn 2+ via \`context.vars.sessionId\` +5. Provider reads \`context.vars.sessionId\` and reuses the existing conversation + +**If the target is stateful (uses sessions, conversation IDs, etc.), the provider MUST support this flow.** Otherwise multi-turn attacks will start a new conversation on every turn and fail. + +For **custom providers**: Accept the \`context\` parameter, check \`context.vars.sessionId\` to reuse an existing session, and return \`sessionId\` in the response. + +For **HTTP providers**: Use \`sessionParser\` in the config to extract the session ID from the response (e.g. \`sessionParser: json.session_id\`). Promptfoo handles the rest automatically. + ## Workflow 1. Read the target spec to understand the API 2. Probe to verify connectivity and response format 3. Decide: HTTP provider (simple) or custom provider (complex) 4. Write config (and provider.js if needed) -5. Verify with promptfoo eval +5. Verify — runs provider smoke test + session test, then promptfoo eval with 2 test cases 6. Call done() with results Be intelligent. Figure out the target's protocol, auth, request/response format from probing. Generate configs that work.`; diff --git a/plugins/promptfoo/src/agent/tools.ts b/plugins/promptfoo/src/agent/tools.ts index 0108da2..cf752fe 100644 --- a/plugins/promptfoo/src/agent/tools.ts +++ b/plugins/promptfoo/src/agent/tools.ts @@ -133,7 +133,7 @@ export const toolDefinitions: ToolDefinition[] = [ }, { name: 'verify', - description: 'Run a mini redteam test to verify the configuration works. This sends a few test prompts through the target.', + description: 'Verify the configuration works. Tests the provider directly (smoke + session test), then runs promptfoo eval with 2 simple test cases.', parameters: { type: 'object', properties: { @@ -142,11 +142,6 @@ export const toolDefinitions: ToolDefinition[] = [ description: 'Path to the config file to verify (default: promptfooconfig.yaml)', default: 'promptfooconfig.yaml', }, - numTests: { - type: 'number', - description: 'Number of test prompts to send (default: 3)', - default: 3, - }, }, }, }, diff --git a/plugins/promptfoo/src/generator/config.ts b/plugins/promptfoo/src/generator/config.ts index afb517a..bc749e8 100644 --- a/plugins/promptfoo/src/generator/config.ts +++ b/plugins/promptfoo/src/generator/config.ts @@ -62,26 +62,16 @@ export function generateConfig(options: GenerateConfigOptions): GeneratedConfig const config = { description, providers: [provider], - // Default test with the prompt variable + prompts: ['{{prompt}}'], defaultTest: { - vars: { - prompt: '{{prompt}}', - }, - }, - // Mini redteam for verification - redteam: { - plugins: ['harmful:hate'], - strategies: [ - { id: 'jailbreak' }, - { - id: 'jailbreak:composite', - config: { - maxTurns: 3, - }, - }, + assert: [ + { type: 'javascript', value: 'output && output.length > 0' }, ], - numTests: 1, }, + tests: [ + { vars: { prompt: 'Hello, how are you?' } }, + { vars: { prompt: 'What can you help me with?' } }, + ], }; // Generate YAML From 2fb57ff2f411dac19e4e1ce4c7511c88759af5bc Mon Sep 17 00:00:00 2001 From: MrFlounder Date: Sat, 21 Feb 2026 16:27:41 -0800 Subject: [PATCH 2/2] fix(crab-pf): include error content in tool results for LLM reasoning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues where the LLM was flying blind after failures: 1. Tool exceptions: outer catch returned result=null, error=message but only result was serialized into the tool message — LLM saw "null" with no explanation. Now includes both error and result in content. 2. Verify smoke test: "empty output" told the LLM nothing about why. Now includes the full provider response (output, sessionId, error) so the LLM can diagnose the actual problem (e.g. missing polling loop, wrong response field, auth failure). Follows opencode's pattern where tool errors go into content for the LLM to reason about, rather than being silently dropped. Co-Authored-By: Claude Opus 4.6 --- plugins/promptfoo/src/agent/loop.ts | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/plugins/promptfoo/src/agent/loop.ts b/plugins/promptfoo/src/agent/loop.ts index f90e3af..63a9c1e 100644 --- a/plugins/promptfoo/src/agent/loop.ts +++ b/plugins/promptfoo/src/agent/loop.ts @@ -154,11 +154,14 @@ Steps: toolCalls: response.toolCalls, }); - // 5. Add tool results + // 5. Add tool results — include error in content so LLM can reason about failures for (const result of toolResults) { + const content = result.error + ? JSON.stringify({ error: result.error, result: result.result }) + : JSON.stringify(result.result); messages.push({ role: 'tool', - content: JSON.stringify(result.result), + content, toolCallId: result.toolCallId, }); } @@ -301,10 +304,10 @@ async function executeTool( // Smoke test const r1 = await instance.callApi('Hello, this is a test message', { vars: {} }, {}); if (!r1 || !r1.output || r1.error) { - const err = r1?.error || 'empty output'; - steps.push(`Smoke test FAILED: ${err}`); + const diag = JSON.stringify(r1, null, 2)?.slice(0, 500) || 'null response'; + steps.push(`Smoke test FAILED. Provider returned: ${diag}`); state.verified = false; - result = { success: false, error: `Provider smoke test failed: ${err}`, steps }; + result = { success: false, error: `Provider smoke test failed`, providerResponse: r1, steps }; break; } steps.push(`Smoke test PASSED: got ${r1.output.length} chars`); @@ -315,10 +318,10 @@ async function executeTool( : { vars: {} }; const r2 = await instance.callApi('Follow up question', sessionContext, {}); if (!r2 || !r2.output || r2.error) { - const err = r2?.error || 'empty output'; - steps.push(`Session test FAILED: ${err}`); + const diag = JSON.stringify(r2, null, 2)?.slice(0, 500) || 'null response'; + steps.push(`Session test FAILED. Provider returned: ${diag}`); state.verified = false; - result = { success: false, error: `Provider session test failed: ${err}`, steps }; + result = { success: false, error: `Provider session test failed`, providerResponse: r2, steps }; break; } steps.push(`Session test PASSED: got ${r2.output.length} chars${r1.sessionId ? `, sessionId: ${r1.sessionId}` : ''}`);