Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 51 additions & 9 deletions src/core/tools/ReadFileTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { getReadablePath } from "../../utils/path"
import { countFileLines } from "../../integrations/misc/line-counter"
import { readLines } from "../../integrations/misc/read-lines"
import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../integrations/misc/extract-text"
import { readTextWithTokenBudget } from "../../integrations/misc/read-text-with-budget"
import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter"
import { parseXml } from "../../utils/xml"
import { resolveToolProtocol } from "../../utils/resolveToolProtocol"
Expand Down Expand Up @@ -45,6 +46,28 @@ interface FileResult {
feedbackImages?: any[]
}

function sliceTextLines(text: string, startLine0: number, endLine0: number): string {
const lines = text.split(/\r?\n/)
// Mirror other readers: if text ends with newline, drop the synthetic last empty line
if (lines.length > 0 && lines[lines.length - 1] === "") {
lines.pop()
}
return lines.slice(startLine0, endLine0 + 1).join("\n")
}

async function tryReadTextViaVscode(fullPath: string): Promise<string | undefined> {
try {
const vscode = await import("vscode")
const uri = vscode.Uri.file(fullPath)
const doc = await vscode.workspace.openTextDocument(uri)
return doc.getText()
} catch {
return undefined
}
}

const MAX_VSCODE_TEXT_READ_BYTES = 2 * 1024 * 1024 // avoid loading very large files into memory just to detect encoding

export class ReadFileTool extends BaseTool<"read_file"> {
readonly name = "read_file" as const

Expand Down Expand Up @@ -365,6 +388,16 @@ export class ReadFileTool extends BaseTool<"read_file"> {
continue
}

const fileSizeBytes = typeof stats.size === "number" ? stats.size : 0
let vscodeText: string | undefined
const getVscodeText = async (): Promise<string | undefined> => {
if (!useNative) return undefined
if (fileSizeBytes > MAX_VSCODE_TEXT_READ_BYTES) return undefined
if (vscodeText !== undefined) return vscodeText
vscodeText = await tryReadTextViaVscode(fullPath)
return vscodeText
}

const [totalLines, isBinary] = await Promise.all([countFileLines(fullPath), isBinaryFile(fullPath)])

if (isBinary) {
Expand Down Expand Up @@ -460,12 +493,14 @@ export class ReadFileTool extends BaseTool<"read_file"> {
if (fileResult.lineRanges && fileResult.lineRanges.length > 0) {
const rangeResults: string[] = []
const nativeRangeResults: string[] = []
const maybeText = await getVscodeText()

for (const range of fileResult.lineRanges) {
const content = addLineNumbers(
await readLines(fullPath, range.end - 1, range.start - 1),
range.start,
)
const rawRangeText =
useNative && maybeText !== undefined
? sliceTextLines(maybeText, range.start - 1, range.end - 1)
: await readLines(fullPath, range.end - 1, range.start - 1)
Comment on lines +499 to +502
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When native reads use VSCode-decoded text, out-of-range <line_range> values no longer error (for example range.start beyond the file length). readLines() rejects in that case, but sliceTextLines() returns an empty string and addLineNumbers() can emit a misleading blank line like 1000 | .

Suggested change
const rawRangeText =
useNative && maybeText !== undefined
? sliceTextLines(maybeText, range.start - 1, range.end - 1)
: await readLines(fullPath, range.end - 1, range.start - 1)
if (useNative && maybeText !== undefined && range.start > totalLines) {
throw new RangeError("Line with index ${range.start - 1} does not exist in \"${fullPath}\". Note that line indexing is zero-based")
}
const rawRangeText =
useNative && maybeText !== undefined
? sliceTextLines(maybeText, range.start - 1, range.end - 1)
: await readLines(fullPath, range.end - 1, range.start - 1)

Fix it with Roo Code or mention @roomote and request a fix.

const content = addLineNumbers(rawRangeText, range.start)
const lineRangeAttr = ` lines="${range.start}-${range.end}"`
rangeResults.push(`<content${lineRangeAttr}>\n${content}</content>`)
nativeRangeResults.push(`Lines ${range.start}-${range.end}:\n${content}`)
Expand Down Expand Up @@ -504,7 +539,12 @@ export class ReadFileTool extends BaseTool<"read_file"> {
}

if (maxReadFileLine > 0 && totalLines > maxReadFileLine) {
const content = addLineNumbers(await readLines(fullPath, maxReadFileLine - 1, 0))
const maybeText = await getVscodeText()
const rawText =
useNative && maybeText !== undefined
? sliceTextLines(maybeText, 0, maxReadFileLine - 1)
: await readLines(fullPath, maxReadFileLine - 1, 0)
const content = addLineNumbers(rawText)
const lineRangeAttr = ` lines="1-${maxReadFileLine}"`
let xmlInfo = `<content${lineRangeAttr}>\n${content}</content>\n`
let nativeInfo = `Lines 1-${maxReadFileLine}:\n${content}\n`
Expand Down Expand Up @@ -566,10 +606,12 @@ export class ReadFileTool extends BaseTool<"read_file"> {
xmlInfo = `<content/>\n<notice>${notice}</notice>\n`
nativeInfo = `Note: ${notice}`
} else {
// Read file with incremental token counting
const result = await readFileWithTokenBudget(fullPath, {
budgetTokens: safeReadBudget,
})
// Prefer VSCode decoding (encoding-aware) for native tool protocol.
const maybeText = await getVscodeText()
const result =
useNative && maybeText !== undefined
? await readTextWithTokenBudget(maybeText, { budgetTokens: safeReadBudget })
: await readFileWithTokenBudget(fullPath, { budgetTokens: safeReadBudget })

content = addLineNumbers(result.content)

Expand Down
111 changes: 111 additions & 0 deletions src/core/tools/__tests__/readFileTool.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import * as path from "path"

import * as vscode from "vscode"

import { countFileLines } from "../../../integrations/misc/line-counter"
import { readLines } from "../../../integrations/misc/read-lines"
import { extractTextFromFile } from "../../../integrations/misc/extract-text"
Expand All @@ -23,6 +25,21 @@ vi.mock("path", async () => {

vi.mock("isbinaryfile")

vi.mock("vscode", () => ({
Uri: {
file: vi.fn((fsPath: string) => ({ fsPath })),
},
workspace: {
// Default: behave like VSCode isn't available in this test environment.
openTextDocument: vi.fn().mockRejectedValue(new Error("vscode not available")),
},
}))

// Avoid spawning tokenizer workers from `read-text-with-budget` in unit tests.
vi.mock("../../../utils/countTokens", () => ({
countTokens: vi.fn().mockResolvedValue(1),
}))

vi.mock("../../../integrations/misc/line-counter")
vi.mock("../../../integrations/misc/read-lines")

Expand Down Expand Up @@ -2011,3 +2028,97 @@ describe("read_file tool concurrent file reads limit", () => {
expect(toolResult).toContain("but the concurrent file reads limit is 5")
})
})

describe("read_file tool native protocol - VSCode decoding path", () => {
const testFilePath = "test/encoded.txt"
const absoluteFilePath = "/test/encoded.txt"

const mockedCountFileLines = vi.mocked(countFileLines)
const mockedReadLines = vi.mocked(readLines)
const mockedIsBinaryFile = vi.mocked(isBinaryFile)
const mockedPathResolve = vi.mocked(path.resolve)
const mockedOpenTextDocument = vi.mocked(vscode.workspace.openTextDocument)

let mockCline: any
let mockProvider: any
let toolResult: ToolResponse | undefined

beforeEach(() => {
mockedCountFileLines.mockClear()
mockedReadLines.mockClear()
mockedIsBinaryFile.mockClear()
mockedPathResolve.mockClear()
mockedOpenTextDocument.mockClear()
mockReadFileWithTokenBudget.mockClear()

const mocks = createMockCline()
mockCline = mocks.mockCline
mockProvider = mocks.mockProvider
setImageSupport(mockCline, false)

mockedPathResolve.mockReturnValue(absoluteFilePath)
mockedIsBinaryFile.mockResolvedValue(false)
mockedCountFileLines.mockResolvedValue(1)

fsPromises.stat.mockResolvedValue({
isDirectory: () => false,
isFile: () => true,
isSymbolicLink: () => false,
} as any)

mockProvider.getState.mockResolvedValue({
maxReadFileLine: -1,
maxImageFileSize: 20,
maxTotalImageSize: 20,
})

toolResult = undefined
})

async function executeReadFile(args: string): Promise<ToolResponse | undefined> {
const toolUse: ReadFileToolUse = {
type: "tool_use",
name: "read_file",
params: { args },
partial: false,
}

await readFileTool.handle(mockCline, toolUse, {
askApproval: mockCline.ask,
handleError: vi.fn(),
pushToolResult: (result: ToolResponse) => {
toolResult = result
},
removeClosingTag: (_: ToolParamName, content?: string) => content ?? "",
toolProtocol: "xml",
})

return toolResult
}

it("should prefer vscode.workspace.openTextDocument() when available (full read)", async () => {
mockedOpenTextDocument.mockResolvedValue({ getText: () => "caf\u00e9" } as any)
mockReadFileWithTokenBudget.mockRejectedValue(new Error("should not be called"))

const result = await executeReadFile(`<file><path>${testFilePath}</path></file>`)

expect(mockedOpenTextDocument).toHaveBeenCalledTimes(1)
expect(result).toContain(`File: ${testFilePath}`)
expect(result).toContain("caf\u00e9")
expect(mockReadFileWithTokenBudget).not.toHaveBeenCalled()
})

it("should use vscode-decoded text for line_range reads", async () => {
mockedCountFileLines.mockResolvedValue(3)
mockedOpenTextDocument.mockResolvedValue({ getText: () => "L1\nL2\nL3" } as any)
mockedReadLines.mockRejectedValue(new Error("should not be called"))

const result = await executeReadFile(`<file><path>${testFilePath}</path><line_range>2-3</line_range></file>`)

expect(mockedOpenTextDocument).toHaveBeenCalledTimes(1)
expect(mockedReadLines).not.toHaveBeenCalled()
expect(result).toContain(`File: ${testFilePath}`)
expect(result).toContain("2 | L2")
expect(result).toContain("3 | L3")
})
})
128 changes: 128 additions & 0 deletions src/integrations/misc/read-text-with-budget.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { Anthropic } from "@anthropic-ai/sdk"

import { countTokens } from "../../utils/countTokens"

export interface ReadTextWithBudgetResult {
/** The content read up to the token budget */
content: string
/** Actual token count of returned content */
tokenCount: number
/** Total lines in the returned content */
lineCount: number
/** Whether the entire text was read (false if truncated) */
complete: boolean
}

export interface ReadTextWithBudgetOptions {
/** Maximum tokens allowed. Required. */
budgetTokens: number
/** Number of lines to buffer before token counting (default: 256) */
chunkLines?: number
}

function normalizeTextToLines(text: string): string[] {
// Normalize line endings and mirror `readFileWithTokenBudget()` behavior:
// - split on line boundaries
// - do not include a trailing empty line caused solely by a trailing newline
const lines = text.split(/\r?\n/)
if (lines.length > 0 && lines[lines.length - 1] === "") {
lines.pop()
}
return lines
}

async function countTextTokens(text: string): Promise<number> {
try {
const contentBlocks: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text }]
return await countTokens(contentBlocks)
} catch {
// Fallback: conservative estimate (2 chars per token)
return Math.ceil(text.length / 2)
}
}

/**
* Reads text while incrementally counting tokens, stopping when budget is reached.
*
* This is the in-memory analogue of [`readFileWithTokenBudget()`](src/integrations/misc/read-file-with-budget.ts:35).
*/
export async function readTextWithTokenBudget(
text: string,
options: ReadTextWithBudgetOptions,
): Promise<ReadTextWithBudgetResult> {
const { budgetTokens, chunkLines = 256 } = options

const allLines = normalizeTextToLines(text)
if (allLines.length === 0) {
return { content: "", tokenCount: 0, lineCount: 0, complete: true }
}

let content = ""
let lineCount = 0
let tokenCount = 0
let complete = true
let lineBuffer: string[] = []

const processBuffer = async (): Promise<boolean> => {
if (lineBuffer.length === 0) return true

const bufferText = lineBuffer.join("\n")
const currentBuffer = [...lineBuffer]
lineBuffer = []

const chunkTokens = await countTextTokens(bufferText)

if (tokenCount + chunkTokens > budgetTokens) {
// Find cutoff within this chunk (binary search by line count)
let low = 0
let high = currentBuffer.length
let bestFit = 0
let bestTokens = 0

while (low < high) {
const mid = Math.floor((low + high + 1) / 2)
const testContent = currentBuffer.slice(0, mid).join("\n")
const testTokens = await countTextTokens(testContent)

if (tokenCount + testTokens <= budgetTokens) {
bestFit = mid
bestTokens = testTokens
low = mid
} else {
high = mid - 1
}
}

if (bestFit > 0) {
const fitContent = currentBuffer.slice(0, bestFit).join("\n")
content += (content.length > 0 ? "\n" : "") + fitContent
tokenCount += bestTokens
lineCount += bestFit
}

complete = false
return false
}

content += (content.length > 0 ? "\n" : "") + bufferText
tokenCount += chunkTokens
lineCount += currentBuffer.length
return true
}

for (const line of allLines) {
lineBuffer.push(line)
if (lineBuffer.length >= chunkLines) {
const continueReading = await processBuffer()
if (!continueReading) {
return { content, tokenCount, lineCount, complete }
}
}
}

if (lineBuffer.length > 0) {
await processBuffer()
}

return { content, tokenCount, lineCount, complete }
}
Loading