diff --git a/plugins/promptfoo/src/agent/loop.ts b/plugins/promptfoo/src/agent/loop.ts index 4585b07..63a9c1e 100644 --- a/plugins/promptfoo/src/agent/loop.ts +++ b/plugins/promptfoo/src/agent/loop.ts @@ -13,7 +13,9 @@ import { toOpenAITools, toAnthropicTools } from './tools.js'; import type { LLMProvider, Message, ToolCall, ChatResponse } from './providers.js'; import type { DiscoveryResult } from '../types.js'; import * as fs from 'node:fs'; +import * as path from 'node:path'; import { execSync } from 'node:child_process'; +import { pathToFileURL } from 'node:url'; export interface AgentOptions { context: string; // Raw artifact or description @@ -76,7 +78,7 @@ Steps: 2. Send a probe to verify connectivity 3. Identify the prompt field and response field 4. Generate the config (and provider file if needed) -5. Verify it works with a mini redteam test +5. Verify it works 6. Call done() when complete`, }, ]; @@ -152,11 +154,14 @@ Steps: toolCalls: response.toolCalls, }); - // 5. Add tool results + // 5. Add tool results — include error in content so LLM can reason about failures for (const result of toolResults) { + const content = result.error + ? JSON.stringify({ error: result.error, result: result.result }) + : JSON.stringify(result.result); messages.push({ role: 'tool', - content: JSON.stringify(result.result), + content, toolCallId: result.toolCallId, }); } @@ -268,60 +273,113 @@ async function executeTool( } case 'verify': { - const { configFile, numTests } = args as { + const { configFile } = args as { configFile?: string; - numTests?: number; }; const configPath = configFile || state.configFile || 'promptfooconfig.yaml'; + const steps: string[] = []; + + // Step 1: Direct provider smoke + session test + const providerPath = path.join(outputDir, 'provider.js'); + if (fs.existsSync(providerPath)) { + // Install dependencies first if package.json exists + const packageJsonPath = path.join(outputDir, 'package.json'); + if (fs.existsSync(packageJsonPath)) { + try { + execSync(`cd "${outputDir}" && npm install --silent 2>&1`, { + timeout: 60000, + encoding: 'utf-8', + }); + } catch { + // Ignore install errors, will surface in import + } + } - // Install dependencies if package.json exists - const packageJsonPath = `${outputDir}/package.json`; - if (fs.existsSync(packageJsonPath)) { - try { - execSync(`cd "${outputDir}" && npm install --silent 2>&1`, { - timeout: 60000, - encoding: 'utf-8', - }); - } catch { - // Ignore install errors, will fail in eval if deps missing + const providerUrl = pathToFileURL(path.resolve(providerPath)).href + `?t=${Date.now()}`; + const mod = await import(providerUrl); + const ProviderClass = mod.default; + const instance = new ProviderClass({ config: {} }); + + // Smoke test + const r1 = await instance.callApi('Hello, this is a test message', { vars: {} }, {}); + if (!r1 || !r1.output || r1.error) { + const diag = JSON.stringify(r1, null, 2)?.slice(0, 500) || 'null response'; + steps.push(`Smoke test FAILED. Provider returned: ${diag}`); + state.verified = false; + result = { success: false, error: `Provider smoke test failed`, providerResponse: r1, steps }; + break; + } + steps.push(`Smoke test PASSED: got ${r1.output.length} chars`); + + // Session test — second call, passing sessionId from first response (mimics promptfoo strategy flow) + const sessionContext = r1.sessionId + ? { vars: { sessionId: r1.sessionId } } + : { vars: {} }; + const r2 = await instance.callApi('Follow up question', sessionContext, {}); + if (!r2 || !r2.output || r2.error) { + const diag = JSON.stringify(r2, null, 2)?.slice(0, 500) || 'null response'; + steps.push(`Session test FAILED. Provider returned: ${diag}`); + state.verified = false; + result = { success: false, error: `Provider session test failed`, providerResponse: r2, steps }; + break; } + steps.push(`Session test PASSED: got ${r2.output.length} chars${r1.sessionId ? `, sessionId: ${r1.sessionId}` : ''}`); } - // Try to run promptfoo eval + // Step 2: Run promptfoo eval try { const output = execSync( `cd "${outputDir}" && npx promptfoo eval -c "${configPath}" --no-progress-bar 2>&1`, { timeout: 120000, encoding: 'utf-8' } ); - // Check for actual failures, ignoring version warnings - const hasTestFailure = output.includes('[FAIL]') || output.includes('Test failed'); + const passMatch = output.match(/(\d+) passed/); + const failMatch = output.match(/(\d+) failed/); + const errorMatch = output.match(/(\d+) error/); + const passed = passMatch ? parseInt(passMatch[1]) : 0; + const failed = failMatch ? parseInt(failMatch[1]) : 0; + const errors = errorMatch ? parseInt(errorMatch[1]) : 0; + const hasConfigError = output.includes('Error loading config') || output.includes('Invalid config'); - const hasProviderError = output.includes('Provider error') || output.includes('Connection refused'); - state.verified = !hasTestFailure && !hasConfigError && !hasProviderError; + if (passed === 0 && failed === 0) { + steps.push('Eval FAILED: zero tests ran'); + state.verified = false; + } else if (failed > 0 || errors > 0 || hasConfigError) { + steps.push(`Eval FAILED: ${passed} passed, ${failed} failed, ${errors} errors`); + state.verified = false; + } else { + steps.push(`Eval PASSED: ${passed} passed, ${failed} failed`); + state.verified = true; + } result = { success: state.verified, output: output.slice(0, 1000), + steps, }; } catch (error) { const err = error as { message: string; stdout?: string; stderr?: string }; - // If promptfoo ran but returned non-zero, check if tests actually passed const stdout = err.stdout || ''; - const hasPassingOutput = stdout.includes('[PASS]') || stdout.includes('Evaluation complete'); - result = { - success: hasPassingOutput, - error: hasPassingOutput ? undefined : err.message, - stdout: stdout.slice(0, 1000), - stderr: err.stderr?.slice(0, 500), - }; + const passMatch = stdout.match(/(\d+) passed/); + const passed = passMatch ? parseInt(passMatch[1]) : 0; - if (hasPassingOutput) { + if (passed > 0 && !stdout.includes('failed')) { + steps.push(`Eval PASSED (non-zero exit): ${passed} passed`); state.verified = true; + } else { + steps.push(`Eval FAILED: ${err.message.slice(0, 200)}`); + state.verified = false; } + + result = { + success: state.verified, + error: state.verified ? undefined : err.message, + stdout: stdout.slice(0, 1000), + steps, + }; } break; } diff --git a/plugins/promptfoo/src/agent/providers.ts b/plugins/promptfoo/src/agent/providers.ts index 35c3935..eef4ff4 100644 --- a/plugins/promptfoo/src/agent/providers.ts +++ b/plugins/promptfoo/src/agent/providers.ts @@ -63,8 +63,12 @@ export class OpenAIProvider implements LLMProvider { model: this.model, messages: options.messages.map((m) => this.toOpenAIMessage(m)), tools: options.tools, - max_tokens: options.maxTokens || 4096, - temperature: options.temperature ?? 0.7, + ...(this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3') + ? { max_completion_tokens: options.maxTokens || 4096 } + : { max_tokens: options.maxTokens || 4096 }), + ...(this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3') + ? {} + : { temperature: options.temperature ?? 0.7 }), }), }); diff --git a/plugins/promptfoo/src/agent/system-prompt.ts b/plugins/promptfoo/src/agent/system-prompt.ts index 1235dc3..b9527d5 100644 --- a/plugins/promptfoo/src/agent/system-prompt.ts +++ b/plugins/promptfoo/src/agent/system-prompt.ts @@ -10,7 +10,7 @@ export const DISCOVERY_SYSTEM_PROMPT = `You are a target discovery agent for pro 1. Probe the target to understand how it communicates 2. Generate a working promptfoo config (YAML + custom provider if needed) -3. Verify it works with a mini redteam test +3. Verify it works ## Tools @@ -18,7 +18,7 @@ export const DISCOVERY_SYSTEM_PROMPT = `You are a target discovery agent for pro - **probe_ws(url, message, headers?, timeout?)** - Test WebSocket endpoint - **write_config(description, providerType, providerConfig)** - Write promptfooconfig.yaml - **write_provider(code, filename, language)** - Write custom provider.js/py -- **verify()** - Run promptfoo eval to test the config +- **verify()** - Test provider directly (smoke + session), then run promptfoo eval - **done(summary, configFile, verified)** - Signal completion ## Promptfoo Config Format @@ -56,26 +56,46 @@ export default class Provider { return 'my-provider'; } - async callApi(prompt) { + async callApi(prompt, context, options) { + // context.vars.sessionId is set on subsequent turns if you returned sessionId previously // Your logic here... - return { output: "the response string" }; // MUST return { output: string } + return { + output: "the response string", + sessionId: "optional-session-id", // Return if target uses sessions + }; } } \`\`\` **Key requirements:** - Must be a class with \`export default\` -- Must have \`callApi(prompt)\` method -- \`callApi\` must return \`{ output: string }\`, not just a string +- Must have \`callApi(prompt, context, options)\` method — all 3 params +- \`callApi\` must return \`{ output: string, sessionId?: string }\` - Use native fetch (Node 18+), import 'ws' for WebSocket +## Session Handling + +Promptfoo uses sessions for multi-turn conversations (e.g. redteam attack strategies like Crescendo and GOAT). The flow works like this: + +1. Strategy calls \`callApi(prompt, context)\` on turn 1 +2. Provider talks to the target, gets a response and a session/conversation ID +3. Provider returns \`{ output: "...", sessionId: "abc123" }\` +4. Promptfoo stores the sessionId and passes it back on turn 2+ via \`context.vars.sessionId\` +5. Provider reads \`context.vars.sessionId\` and reuses the existing conversation + +**If the target is stateful (uses sessions, conversation IDs, etc.), the provider MUST support this flow.** Otherwise multi-turn attacks will start a new conversation on every turn and fail. + +For **custom providers**: Accept the \`context\` parameter, check \`context.vars.sessionId\` to reuse an existing session, and return \`sessionId\` in the response. + +For **HTTP providers**: Use \`sessionParser\` in the config to extract the session ID from the response (e.g. \`sessionParser: json.session_id\`). Promptfoo handles the rest automatically. + ## Workflow 1. Read the target spec to understand the API 2. Probe to verify connectivity and response format 3. Decide: HTTP provider (simple) or custom provider (complex) 4. Write config (and provider.js if needed) -5. Verify with promptfoo eval +5. Verify — runs provider smoke test + session test, then promptfoo eval with 2 test cases 6. Call done() with results Be intelligent. Figure out the target's protocol, auth, request/response format from probing. Generate configs that work.`; diff --git a/plugins/promptfoo/src/agent/tools.ts b/plugins/promptfoo/src/agent/tools.ts index 0108da2..cf752fe 100644 --- a/plugins/promptfoo/src/agent/tools.ts +++ b/plugins/promptfoo/src/agent/tools.ts @@ -133,7 +133,7 @@ export const toolDefinitions: ToolDefinition[] = [ }, { name: 'verify', - description: 'Run a mini redteam test to verify the configuration works. This sends a few test prompts through the target.', + description: 'Verify the configuration works. Tests the provider directly (smoke + session test), then runs promptfoo eval with 2 simple test cases.', parameters: { type: 'object', properties: { @@ -142,11 +142,6 @@ export const toolDefinitions: ToolDefinition[] = [ description: 'Path to the config file to verify (default: promptfooconfig.yaml)', default: 'promptfooconfig.yaml', }, - numTests: { - type: 'number', - description: 'Number of test prompts to send (default: 3)', - default: 3, - }, }, }, }, diff --git a/plugins/promptfoo/src/generator/config.ts b/plugins/promptfoo/src/generator/config.ts index afb517a..bc749e8 100644 --- a/plugins/promptfoo/src/generator/config.ts +++ b/plugins/promptfoo/src/generator/config.ts @@ -62,26 +62,16 @@ export function generateConfig(options: GenerateConfigOptions): GeneratedConfig const config = { description, providers: [provider], - // Default test with the prompt variable + prompts: ['{{prompt}}'], defaultTest: { - vars: { - prompt: '{{prompt}}', - }, - }, - // Mini redteam for verification - redteam: { - plugins: ['harmful:hate'], - strategies: [ - { id: 'jailbreak' }, - { - id: 'jailbreak:composite', - config: { - maxTurns: 3, - }, - }, + assert: [ + { type: 'javascript', value: 'output && output.length > 0' }, ], - numTests: 1, }, + tests: [ + { vars: { prompt: 'Hello, how are you?' } }, + { vars: { prompt: 'What can you help me with?' } }, + ], }; // Generate YAML