Skip to content

Commit ff837fe

Browse files
authored
fix(core): handle read file failures (#33260)
1 parent 4c6750d commit ff837fe

4 files changed

Lines changed: 263 additions & 71 deletions

File tree

packages/core/src/tool/read-filesystem.ts

Lines changed: 118 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,61 @@ export const MAX_MEDIA_INGEST_BYTES = 20 * 1024 * 1024
1313
const MAX_LINE_LENGTH = 2_000
1414
const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
1515

16-
export class BinaryFileError extends Error {
17-
constructor(readonly resource: string) {
18-
super(`Cannot read binary file: ${resource}`)
19-
this.name = "BinaryFileError"
16+
export class BinaryFileError extends Schema.TaggedErrorClass<BinaryFileError>()("ReadTool.BinaryFileError", {
17+
resource: Schema.String,
18+
}) {
19+
override get message() {
20+
return `Cannot read binary file: ${this.resource}`
2021
}
2122
}
2223

23-
export class MediaIngestLimitError extends Error {
24-
constructor(
25-
readonly resource: string,
26-
readonly maximumBytes: number,
27-
) {
28-
super(`Media exceeds ${maximumBytes} byte ingestion limit: ${resource}`)
29-
this.name = "MediaIngestLimitError"
24+
export class MediaIngestLimitError extends Schema.TaggedErrorClass<MediaIngestLimitError>()(
25+
"ReadTool.MediaIngestLimitError",
26+
{
27+
resource: Schema.String,
28+
maximumBytes: Schema.Number,
29+
},
30+
) {
31+
override get message() {
32+
return `Media exceeds ${this.maximumBytes} byte ingestion limit: ${this.resource}`
33+
}
34+
}
35+
36+
export class MalformedUtf8Error extends Schema.TaggedErrorClass<MalformedUtf8Error>()("ReadTool.MalformedUtf8Error", {
37+
resource: Schema.String,
38+
}) {
39+
override get message() {
40+
return `File is not valid UTF-8: ${this.resource}`
41+
}
42+
}
43+
44+
export class OffsetOutOfRangeError extends Schema.TaggedErrorClass<OffsetOutOfRangeError>()(
45+
"ReadTool.OffsetOutOfRangeError",
46+
{ offset: Schema.Number },
47+
) {
48+
override get message() {
49+
return `Offset ${this.offset} is out of range`
3050
}
3151
}
3252

53+
export class PathKindError extends Schema.TaggedErrorClass<PathKindError>()("ReadTool.PathKindError", {
54+
resource: Schema.String,
55+
expected: Schema.Literals(["a file", "a file or directory"]),
56+
}) {
57+
override get message() {
58+
return `Path is not ${this.expected}: ${this.resource}`
59+
}
60+
}
61+
62+
export type InspectError = FSUtil.Error | PathKindError
63+
export type ReadError =
64+
| FSUtil.Error
65+
| BinaryFileError
66+
| MediaIngestLimitError
67+
| MalformedUtf8Error
68+
| OffsetOutOfRangeError
69+
| PathKindError
70+
3371
export const PageInput = Schema.Struct({
3472
offset: PositiveInt.pipe(Schema.optional),
3573
limit: PositiveInt.check(Schema.isLessThanOrEqualTo(MAX_READ_LINES)).pipe(Schema.optional),
@@ -52,13 +90,13 @@ export class ListPage extends Schema.Class<ListPage>("ReadTool.ListPage")({
5290
}) {}
5391

5492
export interface Interface {
55-
readonly inspect: (path: AbsolutePath) => Effect.Effect<"file" | "directory">
93+
readonly inspect: (path: AbsolutePath) => Effect.Effect<"file" | "directory", InspectError>
5694
readonly read: (
5795
path: AbsolutePath,
5896
resource: string,
5997
page?: PageInput,
60-
) => Effect.Effect<FileSystem.Content | TextPage>
61-
readonly list: (path: AbsolutePath, page?: PageInput) => Effect.Effect<ListPage>
98+
) => Effect.Effect<FileSystem.Content | TextPage, ReadError>
99+
readonly list: (path: AbsolutePath, page?: PageInput) => Effect.Effect<ListPage, FSUtil.Error>
62100
}
63101

64102
export class Service extends Context.Service<Service, Interface>()("@opencode/ReadToolFileSystem") {}
@@ -111,11 +149,21 @@ const binary = (resource: string, bytes: Uint8Array) => {
111149
}
112150
return nonPrintable / bytes.length > 0.3
113151
}
152+
const decodeUtf8 = (resource: string, decoder: TextDecoder, bytes?: Uint8Array) =>
153+
Effect.try({
154+
try: () => decoder.decode(bytes, { stream: bytes !== undefined }),
155+
catch: (error) => {
156+
if (error instanceof TypeError) return new MalformedUtf8Error({ resource })
157+
throw error
158+
},
159+
})
160+
const decodeChunk = (resource: string, decoder: TextDecoder, bytes: Uint8Array) =>
161+
bytes.includes(0) ? Effect.fail(new BinaryFileError({ resource })) : decodeUtf8(resource, decoder, bytes)
114162

115163
export const inspect = Effect.fn("ReadTool.inspect")(function* (fs: FSUtil.Interface, input: string) {
116-
const info = yield* fs.stat(input).pipe(Effect.orDie)
164+
const info = yield* fs.stat(input)
117165
const type = info.type === "File" ? "file" : info.type === "Directory" ? "directory" : undefined
118-
if (!type) return yield* Effect.die(new Error("Path is not a file or directory"))
166+
if (!type) return yield* Effect.fail(new PathKindError({ resource: input, expected: "a file or directory" }))
119167
return type
120168
})
121169

@@ -125,32 +173,30 @@ export const read = Effect.fn("ReadTool.read")(function* (
125173
resource: string,
126174
page: PageInput = {},
127175
) {
128-
const real = yield* fs.realPath(input).pipe(Effect.orDie)
176+
const real = yield* fs.realPath(input)
129177
return yield* Effect.scoped(
130178
Effect.gen(function* () {
131-
const file = yield* fs.open(real, { flag: "r" }).pipe(Effect.orDie)
132-
const info = yield* file.stat.pipe(Effect.orDie)
133-
if (info.type !== "File") return yield* Effect.die(new Error("Path is not a file"))
179+
const file = yield* fs.open(real, { flag: "r" })
180+
const info = yield* file.stat
181+
if (info.type !== "File") return yield* Effect.fail(new PathKindError({ resource, expected: "a file" }))
134182
const first = Option.getOrElse(
135-
yield* file.readAlloc(Math.min(64 * 1024, Number(info.size) || 4 * 1024)).pipe(Effect.orDie),
183+
yield* file.readAlloc(Math.min(64 * 1024, Number(info.size) || 4 * 1024)),
136184
() => new Uint8Array(),
137185
)
138186
const mime = imageMime(first)
139187
if (mime) {
140188
if (info.size > MAX_MEDIA_INGEST_BYTES)
141-
return yield* Effect.die(new MediaIngestLimitError(resource, MAX_MEDIA_INGEST_BYTES))
189+
return yield* Effect.fail(new MediaIngestLimitError({ resource, maximumBytes: MAX_MEDIA_INGEST_BYTES }))
142190
const chunks = [first]
143191
let total = first.length
144192
while (total <= MAX_MEDIA_INGEST_BYTES) {
145-
const chunk = yield* file
146-
.readAlloc(Math.min(64 * 1024, MAX_MEDIA_INGEST_BYTES + 1 - total))
147-
.pipe(Effect.orDie)
193+
const chunk = yield* file.readAlloc(Math.min(64 * 1024, MAX_MEDIA_INGEST_BYTES + 1 - total))
148194
if (Option.isNone(chunk)) break
149195
chunks.push(chunk.value)
150196
total += chunk.value.length
151197
}
152198
if (total > MAX_MEDIA_INGEST_BYTES)
153-
return yield* Effect.die(new MediaIngestLimitError(resource, MAX_MEDIA_INGEST_BYTES))
199+
return yield* Effect.fail(new MediaIngestLimitError({ resource, maximumBytes: MAX_MEDIA_INGEST_BYTES }))
154200
return {
155201
uri: pathToFileURL(real).href,
156202
name: path.basename(real),
@@ -162,19 +208,19 @@ export const read = Effect.fn("ReadTool.read")(function* (
162208
mime,
163209
}
164210
}
165-
if (startsWith(first, [0x25, 0x50, 0x44, 0x46]) || binary(resource, first))
166-
return yield* Effect.die(new BinaryFileError(resource))
211+
if (startsWith(first, [0x25, 0x50, 0x44, 0x46]) || extensions.has(path.extname(resource).toLowerCase()))
212+
return yield* Effect.fail(new BinaryFileError({ resource }))
167213
const paged = info.size > MAX_READ_BYTES || page.offset !== undefined || page.limit !== undefined
168214
if (!paged) {
215+
if (binary(resource, first)) return yield* Effect.fail(new BinaryFileError({ resource }))
169216
const decoder = new TextDecoder("utf-8", { fatal: true })
170-
const text = [yield* Effect.sync(() => decoder.decode(first, { stream: true }))]
217+
const text = [yield* decodeUtf8(resource, decoder, first)]
171218
while (true) {
172-
const chunk = yield* file.readAlloc(64 * 1024).pipe(Effect.orDie)
219+
const chunk = yield* file.readAlloc(64 * 1024)
173220
if (Option.isNone(chunk)) break
174-
if (chunk.value.includes(0)) return yield* Effect.die(new BinaryFileError(resource))
175-
text.push(yield* Effect.sync(() => decoder.decode(chunk.value, { stream: true })))
221+
text.push(yield* decodeChunk(resource, decoder, chunk.value))
176222
}
177-
text.push(yield* Effect.sync(() => decoder.decode()))
223+
text.push(yield* decodeUtf8(resource, decoder))
178224
return {
179225
uri: pathToFileURL(real).href,
180226
name: path.basename(real),
@@ -191,34 +237,29 @@ export const read = Effect.fn("ReadTool.read")(function* (
191237
let discard = false
192238
let line = 1
193239
let bytes = 0
194-
let found = false
195-
let truncated = false
196240
let next: number | undefined
197241
const append = (input: string) => {
198242
if (line < offset) {
199243
line++
200-
return
244+
return true
201245
}
202246
if (lines.length >= limit || bytes >= MAX_READ_BYTES) {
203-
truncated = true
204-
next ??= line++
205-
return
247+
next = line
248+
return false
206249
}
207-
found = true
208250
const text = input.length > MAX_LINE_LENGTH ? input.slice(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : input
209251
const size = Buffer.byteLength(text, "utf-8") + (lines.length > 0 ? 1 : 0)
210252
if (bytes + size > MAX_READ_BYTES) {
211-
truncated = true
212-
next ??= line++
213-
return
253+
next = line
254+
return false
214255
}
215256
lines.push(text)
216257
bytes += size
217258
line++
259+
return true
218260
}
219-
const consume = (chunk: Uint8Array) => {
220-
if (chunk.includes(0)) throw new BinaryFileError(resource)
221-
let text = decoder.decode(chunk, { stream: true })
261+
const consume = (input: string) => {
262+
let text = input
222263
while (true) {
223264
const index = text.indexOf("\n")
224265
if (index === -1) {
@@ -235,34 +276,53 @@ export const read = Effect.fn("ReadTool.read")(function* (
235276
pending = ""
236277
discard = false
237278
text = text.slice(index + 1)
238-
append(current.endsWith("\r") ? current.slice(0, -1) : current)
279+
if (!append(current.endsWith("\r") ? current.slice(0, -1) : current)) return false
239280
}
281+
return true
240282
}
241-
yield* Effect.sync(() => consume(first))
242-
while (true) {
243-
const chunk = yield* file.readAlloc(64 * 1024).pipe(Effect.orDie)
283+
const consumeChunk = Effect.fnUntraced(function* (chunk: Uint8Array) {
284+
let start = 0
285+
while (start < chunk.length) {
286+
if (lines.length >= limit || bytes >= MAX_READ_BYTES) {
287+
next = line
288+
return false
289+
}
290+
const newline = chunk.indexOf(10, start)
291+
const end = newline === -1 ? chunk.length : newline + 1
292+
const segment = chunk.subarray(start, end)
293+
if (binary(resource, segment)) return yield* Effect.fail(new BinaryFileError({ resource }))
294+
if (!consume(yield* decodeUtf8(resource, decoder, segment))) return false
295+
start = end
296+
}
297+
return true
298+
})
299+
let done = !(yield* consumeChunk(first))
300+
while (!done) {
301+
const chunk = yield* file.readAlloc(64 * 1024)
244302
if (Option.isNone(chunk)) break
245-
yield* Effect.sync(() => consume(chunk.value))
303+
done = !(yield* consumeChunk(chunk.value))
304+
}
305+
if (!done) {
306+
const tail = yield* decodeUtf8(resource, decoder)
307+
if (!discard) pending += tail
308+
if (pending) append(pending.endsWith("\r") ? pending.slice(0, -1) : pending)
246309
}
247-
const tail = yield* Effect.sync(() => decoder.decode())
248-
if (!discard) pending += tail
249-
if (pending) append(pending.endsWith("\r") ? pending.slice(0, -1) : pending)
250-
if (!found && offset !== 1) return yield* Effect.die(new Error(`Offset ${offset} is out of range`))
310+
if (lines.length === 0 && offset !== 1) return yield* Effect.fail(new OffsetOutOfRangeError({ offset }))
251311
return new TextPage({
252312
type: "text-page",
253313
content: lines.join("\n"),
254314
mime: FSUtil.mimeType(real),
255315
offset,
256-
truncated,
316+
truncated: next !== undefined,
257317
...(next === undefined ? {} : { next }),
258318
})
259319
}),
260320
)
261321
})
262322

263323
export const list = Effect.fn("ReadTool.list")(function* (fs: FSUtil.Interface, input: string, page: PageInput = {}) {
264-
const real = yield* fs.realPath(input).pipe(Effect.orDie)
265-
const items = yield* fs.readDirectoryEntries(real).pipe(Effect.orDie)
324+
const real = yield* fs.realPath(input)
325+
const items = yield* fs.readDirectoryEntries(real)
266326
const offset = page.offset ?? 1
267327
const limit = Math.min(page.limit ?? MAX_READ_LINES, MAX_READ_LINES)
268328
const entries = yield* Effect.forEach(

packages/core/src/tool/read.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ export const layer = Layer.effectDiscard(
8383
.pipe(Effect.catchTag("Image.ResizerUnavailableError", () => Effect.succeed(content)))
8484
}
8585
if ("encoding" in content && content.encoding === "base64")
86-
return yield* Effect.fail(new ReadToolFileSystem.BinaryFileError(resource))
86+
return yield* Effect.fail(new ReadToolFileSystem.BinaryFileError({ resource }))
8787
return content
8888
}).pipe(
8989
Effect.mapError((error) => {

0 commit comments

Comments
 (0)