diff --git a/lib/commands/context.ts b/lib/commands/context.ts index 0c3a2fb..9923c53 100644 --- a/lib/commands/context.ts +++ b/lib/commands/context.ts @@ -1,6 +1,40 @@ /** - * DCP Context command handler. + * DCP Context Command * Shows a visual breakdown of token usage in the current session. + * + * TOKEN CALCULATION STRATEGY + * ========================== + * We minimize tokenizer estimation by leveraging API-reported values wherever possible. + * + * WHAT WE GET FROM THE API (exact): + * - tokens.input : Input tokens for each assistant response + * - tokens.output : Output tokens generated (includes text + tool calls) + * - tokens.reasoning: Reasoning tokens used + * - tokens.cache : Cache read/write tokens + * + * HOW WE CALCULATE EACH CATEGORY: + * + * SYSTEM = firstAssistant.input + cache.read - tokenizer(firstUserMessage) + * The first response's input contains system + first user message. + * + * TOOLS = tokenizer(toolInputs + toolOutputs) - prunedTokens + * We must tokenize tools anyway for pruning decisions. + * + * USER = tokenizer(all user messages) + * User messages are typically small, so estimation is acceptable. + * + * ASSISTANT = total - system - user - tools + * Calculated as residual. This absorbs: + * - Assistant text output tokens + * - Reasoning tokens (if persisted by the model) + * - Any estimation errors + * + * TOTAL = input + output + reasoning + cache.read + cache.write + * Matches opencode's UI display. + * + * WHY ASSISTANT IS THE RESIDUAL: + * If reasoning tokens persist in context (model-dependent), they semantically + * belong with "Assistant" since reasoning IS assistant-generated content. */ import type { Logger } from "../logger" @@ -24,9 +58,10 @@ interface TokenBreakdown { system: number user: number assistant: number - reasoning: number tools: number - pruned: number + toolCount: number + prunedTokens: number + prunedCount: number total: number } @@ -35,9 +70,10 @@ function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdo system: 0, user: 0, assistant: 0, - reasoning: 0, tools: 0, - pruned: state.stats.totalPruneTokens, + toolCount: 0, + prunedTokens: state.stats.totalPruneTokens, + prunedCount: state.prune.toolIds.length, total: 0, } @@ -52,26 +88,6 @@ function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdo } } - let firstUserTokens = 0 - for (const msg of messages) { - if (msg.info.role === "user" && !isIgnoredUserMessage(msg)) { - for (const part of msg.parts) { - if (part.type === "text") { - const textPart = part as TextPart - firstUserTokens += countTokens(textPart.text || "") - } - } - break - } - } - - // Calculate system tokens: first response's total input minus first user message - if (firstAssistant) { - const firstInput = - (firstAssistant.tokens?.input || 0) + (firstAssistant.tokens?.cache?.read || 0) - breakdown.system = Math.max(0, firstInput - firstUserTokens) - } - let lastAssistant: AssistantMessage | undefined for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i] @@ -84,71 +100,74 @@ function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdo } } - // Get total from API - // Total = input + output + reasoning + cache.read + cache.write const apiInput = lastAssistant?.tokens?.input || 0 const apiOutput = lastAssistant?.tokens?.output || 0 const apiReasoning = lastAssistant?.tokens?.reasoning || 0 const apiCacheRead = lastAssistant?.tokens?.cache?.read || 0 const apiCacheWrite = lastAssistant?.tokens?.cache?.write || 0 - const apiTotal = apiInput + apiOutput + apiReasoning + apiCacheRead + apiCacheWrite - - for (const msg of messages) { - if (isMessageCompacted(state, msg)) { - continue - } + breakdown.total = apiInput + apiOutput + apiReasoning + apiCacheRead + apiCacheWrite - if (msg.info.role === "user" && isIgnoredUserMessage(msg)) { - continue - } + const userTextParts: string[] = [] + const toolInputParts: string[] = [] + const toolOutputParts: string[] = [] + let firstUserText = "" + let foundFirstUser = false - const info = msg.info - const role = info.role + for (const msg of messages) { + if (isMessageCompacted(state, msg)) continue + if (msg.info.role === "user" && isIgnoredUserMessage(msg)) continue for (const part of msg.parts) { - switch (part.type) { - case "text": { - const textPart = part as TextPart - const tokens = countTokens(textPart.text || "") - if (role === "user") { - breakdown.user += tokens - } else { - breakdown.assistant += tokens - } - break + if (part.type === "text" && msg.info.role === "user") { + const textPart = part as TextPart + const text = textPart.text || "" + userTextParts.push(text) + if (!foundFirstUser) { + firstUserText += text } - case "tool": { - const toolPart = part as ToolPart - - if (toolPart.state?.input) { - const inputStr = - typeof toolPart.state.input === "string" - ? toolPart.state.input - : JSON.stringify(toolPart.state.input) - breakdown.tools += countTokens(inputStr) - } - - if (toolPart.state?.status === "completed" && toolPart.state?.output) { - const outputStr = - typeof toolPart.state.output === "string" - ? toolPart.state.output - : JSON.stringify(toolPart.state.output) - breakdown.tools += countTokens(outputStr) - } - break + } else if (part.type === "tool") { + const toolPart = part as ToolPart + breakdown.toolCount++ + + if (toolPart.state?.input) { + const inputStr = + typeof toolPart.state.input === "string" + ? toolPart.state.input + : JSON.stringify(toolPart.state.input) + toolInputParts.push(inputStr) + } + + if (toolPart.state?.status === "completed" && toolPart.state?.output) { + const outputStr = + typeof toolPart.state.output === "string" + ? toolPart.state.output + : JSON.stringify(toolPart.state.output) + toolOutputParts.push(outputStr) } } } + + if (msg.info.role === "user" && !isIgnoredUserMessage(msg) && !foundFirstUser) { + foundFirstUser = true + } } - breakdown.tools = Math.max(0, breakdown.tools - breakdown.pruned) + const firstUserTokens = countTokens(firstUserText) + breakdown.user = countTokens(userTextParts.join("\n")) + const toolInputTokens = countTokens(toolInputParts.join("\n")) + const toolOutputTokens = countTokens(toolOutputParts.join("\n")) - // Calculate reasoning as the difference between API total and our counted parts - // This handles both interleaved thinking and non-interleaved models correctly - const countedParts = breakdown.system + breakdown.user + breakdown.assistant + breakdown.tools - breakdown.reasoning = Math.max(0, apiTotal - countedParts) + if (firstAssistant) { + const firstInput = + (firstAssistant.tokens?.input || 0) + (firstAssistant.tokens?.cache?.read || 0) + breakdown.system = Math.max(0, firstInput - firstUserTokens) + } - breakdown.total = apiTotal + breakdown.tools = Math.max(0, toolInputTokens + toolOutputTokens - breakdown.prunedTokens) + breakdown.assistant = Math.max( + 0, + breakdown.total - breakdown.system - breakdown.user - breakdown.tools, + ) return breakdown } @@ -164,23 +183,18 @@ function formatContextMessage(breakdown: TokenBreakdown): string { const lines: string[] = [] const barWidth = 30 - const values = [ - breakdown.system, - breakdown.user, - breakdown.assistant, - breakdown.reasoning, - breakdown.tools, - ] - const maxValue = Math.max(...values) + const toolsInContext = breakdown.toolCount - breakdown.prunedCount + const toolsLabel = `Tools (${toolsInContext})` const categories = [ { label: "System", value: breakdown.system, char: "█" }, { label: "User", value: breakdown.user, char: "▓" }, { label: "Assistant", value: breakdown.assistant, char: "▒" }, - { label: "Reasoning", value: breakdown.reasoning, char: "░" }, - { label: "Tools", value: breakdown.tools, char: "⣿" }, + { label: toolsLabel, value: breakdown.tools, char: "░" }, ] as const + const maxLabelLen = Math.max(...categories.map((c) => c.label.length)) + lines.push("╭───────────────────────────────────────────────────────────╮") lines.push("│ DCP Context Analysis │") lines.push("╰───────────────────────────────────────────────────────────╯") @@ -190,10 +204,10 @@ function formatContextMessage(breakdown: TokenBreakdown): string { lines.push("") for (const cat of categories) { - const bar = createBar(cat.value, maxValue, barWidth, cat.char) + const bar = createBar(cat.value, breakdown.total, barWidth, cat.char) const percentage = breakdown.total > 0 ? ((cat.value / breakdown.total) * 100).toFixed(1) : "0.0" - const labelWithPct = `${cat.label.padEnd(9)} ${percentage.padStart(5)}% ` + const labelWithPct = `${cat.label.padEnd(maxLabelLen)} ${percentage.padStart(5)}% ` const valueStr = formatTokenCount(cat.value).padStart(13) lines.push(`${labelWithPct}│${bar.padEnd(barWidth)}│${valueStr}`) } @@ -204,12 +218,12 @@ function formatContextMessage(breakdown: TokenBreakdown): string { lines.push("Summary:") - if (breakdown.pruned > 0) { - const withoutPruning = breakdown.total + breakdown.pruned - const savingsPercent = ((breakdown.pruned / withoutPruning) * 100).toFixed(1) + if (breakdown.prunedTokens > 0) { + const withoutPruning = breakdown.total + breakdown.prunedTokens lines.push( - ` Current context: ~${formatTokenCount(breakdown.total)} (${savingsPercent}% saved)`, + ` Pruned: ${breakdown.prunedCount} tools (~${formatTokenCount(breakdown.prunedTokens)})`, ) + lines.push(` Current context: ~${formatTokenCount(breakdown.total)}`) lines.push(` Without DCP: ~${formatTokenCount(withoutPruning)}`) } else { lines.push(` Current context: ~${formatTokenCount(breakdown.total)}`)