diff --git a/packages/opencode/src/snapshot/index.ts b/packages/opencode/src/snapshot/index.ts index b3c8a905c25..7a48bb19d5a 100644 --- a/packages/opencode/src/snapshot/index.ts +++ b/packages/opencode/src/snapshot/index.ts @@ -12,6 +12,7 @@ export namespace Snapshot { const log = Log.create({ service: "snapshot" }) const hour = 60 * 60 * 1000 const prune = "7.days" + const sizeThreshold = 1 * 1024 * 1024 * 1024 // 1 GB export function init() { Scheduler.register({ @@ -22,6 +23,77 @@ export namespace Snapshot { }) } + async function gitAddFiltered() { + const gitEnv = { + ...process.env, + GIT_DIR: gitdir(), + GIT_WORK_TREE: Instance.worktree, + } + + const intent = await $`git add -N .`.cwd(Instance.directory).env(gitEnv).quiet().nothrow() + if (intent.exitCode !== 0) { + log.warn("git add -N failed", { exitCode: intent.exitCode, stderr: intent.stderr.toString() }) + return intent + } + + const listed = await listStagedFiles(gitEnv) + if ("output" in listed) return listed.output + + const largeFiles = await findLargeFiles(listed.files) + await unstageAndExclude(gitEnv, largeFiles) + + const addOutput = await $`git add .`.cwd(Instance.directory).env(gitEnv).quiet().nothrow() + if (addOutput.exitCode !== 0) { + log.warn("git add failed", { exitCode: addOutput.exitCode, stderr: addOutput.stderr.toString() }) + } + return addOutput + } + + async function listStagedFiles(env: NodeJS.ProcessEnv) { + const output = await $`git ls-files -z --cached --others --exclude-standard` + .cwd(Instance.directory) + .env(env) + .quiet() + .nothrow() + if (output.exitCode !== 0) { + log.warn("git ls-files failed", { exitCode: output.exitCode, stderr: output.stderr.toString() }) + return { output } + } + return { files: output.stdout.toString().split("\0").filter(Boolean) } + } + + async function findLargeFiles(files: string[]) { + const checks = await Promise.all( + files.map(async (file) => { + const full = path.join(Instance.worktree, file) + const stat = await fs.stat(full).catch(() => null) + return { file, large: stat ? stat.size > sizeThreshold : false } + }), + ) + return checks.filter((item) => item.large).map((item) => item.file) + } + + async function unstageAndExclude(env: NodeJS.ProcessEnv, files: string[]) { + if (files.length === 0) return + log.info("removing large files from snapshot", { files }) + const proc = Bun.spawn(["git", "rm", "--cached", "--ignore-unmatch", "--", ...files], { + cwd: Instance.directory, + env, + stdout: "ignore", + stderr: "ignore", + }) + await proc.exited + + const exclude = path.join(gitdir(), "info", "exclude") + await fs.mkdir(path.dirname(exclude), { recursive: true }) + const current = await fs.readFile(exclude, "utf8").catch(() => "") + const existing = new Set(current.split("\n").filter(Boolean)) + const added = files.filter((file) => !existing.has(file)) + if (added.length === 0) return + const base = current.length === 0 || current.endsWith("\n") ? current : `${current}\n` + await fs.writeFile(exclude, `${base}${added.join("\n")}\n`) + } + export async function cleanup() { if (Instance.project.vcs !== "git") return const cfg = await Config.get() @@ -65,7 +137,7 @@ export namespace Snapshot { await $`git --git-dir ${git} config core.autocrlf false`.quiet().nothrow() log.info("initialized") } - await $`git --git-dir ${git} --work-tree ${Instance.worktree} add .`.quiet().cwd(Instance.directory).nothrow() + await gitAddFiltered() const hash = await $`git --git-dir ${git} --work-tree ${Instance.worktree} write-tree` .quiet() .cwd(Instance.directory) @@ -83,7 +155,7 @@ export namespace Snapshot { export async function patch(hash: string): Promise { const git = gitdir() - await $`git --git-dir ${git} --work-tree ${Instance.worktree} add .`.quiet().cwd(Instance.directory).nothrow() + await gitAddFiltered() const result = await $`git -c core.autocrlf=false -c core.quotepath=false --git-dir ${git} --work-tree ${Instance.worktree} diff --no-ext-diff --name-only ${hash} -- .` .quiet() @@ -161,7 +233,7 @@ export namespace Snapshot { export async function diff(hash: string) { const git = gitdir() - await $`git --git-dir ${git} --work-tree ${Instance.worktree} add .`.quiet().cwd(Instance.directory).nothrow() + await gitAddFiltered() const result = await $`git -c core.autocrlf=false -c core.quotepath=false --git-dir ${git} --work-tree ${Instance.worktree} diff --no-ext-diff ${hash} -- .` .quiet() diff --git a/packages/opencode/test/snapshot/snapshot.test.ts b/packages/opencode/test/snapshot/snapshot.test.ts index 091469ec761..f6c8d356d48 100644 --- a/packages/opencode/test/snapshot/snapshot.test.ts +++ b/packages/opencode/test/snapshot/snapshot.test.ts @@ -1,5 +1,6 @@ import { test, expect } from "bun:test" import { $ } from "bun" +import fs from "fs/promises" import { Snapshot } from "../../src/snapshot" import { Instance } from "../../src/project/instance" import { tmpdir } from "../fixture/fixture" @@ -23,6 +24,11 @@ async function bootstrap() { }) } +async function writeLarge(file: string) { + await Bun.write(file, "") + await fs.truncate(file, 1024 * 1024 * 1024 + 1) +} + test("tracks deleted files correctly", async () => { await using tmp = await bootstrap() await Instance.provide({ @@ -164,6 +170,101 @@ test("large file handling", async () => { }) }) +test("patch ignores files larger than snapshot threshold", async () => { + await using tmp = await bootstrap() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const before = await Snapshot.track() + expect(before).toBeTruthy() + + const big = `${tmp.path}/big.bin` + const small = `${tmp.path}/small.txt` + + await writeLarge(big) + await Bun.write(small, "small") + + const patch = await Snapshot.patch(before!) + expect(patch.files).toContain(small) + expect(patch.files).not.toContain(big) + expect(await Bun.file(big).exists()).toBe(true) + }, + }) +}) + +test("patch ignores large files with spaces and leading dashes", async () => { + await using tmp = await bootstrap() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const before = await Snapshot.track() + expect(before).toBeTruthy() + + const spaced = `${tmp.path}/big file.bin` + const dashed = `${tmp.path}/-big.bin` + const small = `${tmp.path}/small.txt` + + await writeLarge(spaced) + await writeLarge(dashed) + await Bun.write(small, "small") + + const patch = await Snapshot.patch(before!) + expect(patch.files).toContain(small) + expect(patch.files).not.toContain(spaced) + expect(patch.files).not.toContain(dashed) + expect(await Bun.file(spaced).exists()).toBe(true) + expect(await Bun.file(dashed).exists()).toBe(true) + }, + }) +}) + +test("diff ignores files larger than snapshot threshold", async () => { + await using tmp = await bootstrap() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const before = await Snapshot.track() + expect(before).toBeTruthy() + + const big = `${tmp.path}/big.bin` + const small = `${tmp.path}/small.txt` + + await writeLarge(big) + await Bun.write(small, "small") + + const diff = await Snapshot.diff(before!) + expect(diff).toContain("small.txt") + expect(diff).not.toContain("big.bin") + expect(await Bun.file(big).exists()).toBe(true) + }, + }) +}) + +test("diffFull ignores files larger than snapshot threshold", async () => { + await using tmp = await bootstrap() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const before = await Snapshot.track() + expect(before).toBeTruthy() + + const big = `${tmp.path}/big.bin` + const small = `${tmp.path}/small.txt` + + await writeLarge(big) + await Bun.write(small, "small") + + const after = await Snapshot.track() + expect(after).toBeTruthy() + + const diffs = await Snapshot.diffFull(before!, after!) + expect(diffs.length).toBe(1) + expect(diffs[0].file).toBe("small.txt") + expect(await Bun.file(big).exists()).toBe(true) + }, + }) +}) + test("nested directory revert", async () => { await using tmp = await bootstrap() await Instance.provide({