diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts index f255a2b..303ba6a 100644 --- a/runner/configuration/environment-config.ts +++ b/runner/configuration/environment-config.ts @@ -70,6 +70,8 @@ export const environmentConfigSchema = z.object({ fullStackFramework: z.string().optional(), /** Path to the prompt to use when rating code. */ codeRatingPrompt: z.string().optional(), + /** Path to the prompt to use when rating screenshots. */ + visualRatingPrompt: z.string().optional(), /** When enabled, the system prompts for this environment won't be included in the report. */ classifyPrompts: z.boolean().optional(), /** diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts index 0b967f2..4af69f0 100644 --- a/runner/configuration/environment.ts +++ b/runner/configuration/environment.ts @@ -50,6 +50,8 @@ export class Environment { readonly clientSideFramework: FrameworkInfo; /** Path from which to read the code rating prompt. */ readonly codeRatingPromptPath: string | null; + /** Path from which to read the visual rating prompt. */ + readonly visualRatingPromptPath: string | null; /** Whether the prompts should be removed from the final report. */ readonly classifyPrompts: boolean; /** Whether this is one of the built-in environment that come with the runner. */ @@ -109,6 +111,9 @@ export class Environment { this.codeRatingPromptPath = config.codeRatingPrompt ? join(rootPath, config.codeRatingPrompt) : null; + this.visualRatingPromptPath = config.visualRatingPrompt + ? join(rootPath, config.visualRatingPrompt) + : null; this.classifyPrompts = config.classifyPrompts ?? false; this.isBuiltIn = rootPath.includes('node_modules'); this.executor = config.executor; diff --git a/runner/ratings/autoraters/visuals-rater.ts b/runner/ratings/autoraters/visuals-rater.ts index 4dc5a76..c66ec3b 100644 --- a/runner/ratings/autoraters/visuals-rater.ts +++ b/runner/ratings/autoraters/visuals-rater.ts @@ -12,6 +12,10 @@ import {Environment} from '../../configuration/environment.js'; import {screenshotUrlToPngBuffer} from '../../utils/screenshots.js'; import {Usage} from '../../shared-interfaces.js'; import {AiSdkRunner} from '../../codegen/ai-sdk/ai-sdk-runner.js'; +import {readFileSync} from 'fs'; + +/** Cache for visual rating prompts that have been read from disk. */ +const CACHED_VISUAL_RATING_PROMPTS: Record = {}; /** * Automatically rate the appearance of a screenshot using an LLM. @@ -32,7 +36,18 @@ export async function autoRateAppearance( screenshotPngUrl: string, label: string, ): Promise { - const prompt = environment.renderPrompt(defaultVisualRaterPrompt, null, { + let promptText: string; + if (environment.visualRatingPromptPath) { + CACHED_VISUAL_RATING_PROMPTS[environment.visualRatingPromptPath] ??= readFileSync( + environment.visualRatingPromptPath, + 'utf8', + ); + promptText = CACHED_VISUAL_RATING_PROMPTS[environment.visualRatingPromptPath]; + } else { + promptText = defaultVisualRaterPrompt; + } + + const prompt = environment.renderPrompt(promptText, environment.visualRatingPromptPath, { APP_PROMPT: appPrompt, }).result;