Skip to content

Commit d2310b3

Browse files
authored
Merge pull request #897 from entireio/soph/binary-check-ci
check for binaries in PR diff
2 parents 5b06490 + f2b7ba8 commit d2310b3

File tree

3 files changed

+304
-0
lines changed

3 files changed

+304
-0
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: PR Binary Size
2+
3+
on:
4+
pull_request:
5+
6+
permissions:
7+
contents: read
8+
9+
jobs:
10+
binary-size:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
14+
with:
15+
fetch-depth: 0
16+
17+
- name: Check for oversized binary files
18+
env:
19+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
20+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
21+
MAX_BINARY_SIZE_BYTES: "1048576"
22+
run: bash scripts/check-pr-binaries.sh "${BASE_SHA}" "${HEAD_SHA}"
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
package cli
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"os"
7+
"os/exec"
8+
"path/filepath"
9+
"runtime"
10+
"testing"
11+
12+
"github.com/entireio/cli/cmd/entire/cli/testutil"
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
func TestCheckPRBinaries_AddThenDeleteStillFails(t *testing.T) {
17+
t.Parallel()
18+
19+
repoDir := t.TempDir()
20+
testutil.InitRepo(t, repoDir)
21+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
22+
testutil.GitAdd(t, repoDir, "README.md")
23+
testutil.GitCommit(t, repoDir, "init")
24+
baseSHA := testutil.GetHeadHash(t, repoDir)
25+
26+
largeBinary := bytes.Repeat([]byte{0}, 1048577)
27+
err := os.WriteFile(filepath.Join(repoDir, "oversized.bin"), largeBinary, 0o644)
28+
require.NoError(t, err)
29+
testutil.GitAdd(t, repoDir, "oversized.bin")
30+
testutil.GitCommit(t, repoDir, "add oversized binary")
31+
32+
headWithBinary := testutil.GetHeadHash(t, repoDir)
33+
output, err := runBinaryCheckScript(t, repoDir, baseSHA, headWithBinary)
34+
require.Error(t, err)
35+
require.Contains(t, output, "oversized.bin")
36+
37+
runGitCommand(t, repoDir, "rm", "oversized.bin")
38+
testutil.GitCommit(t, repoDir, "delete oversized binary")
39+
40+
output, err = runBinaryCheckScript(t, repoDir, baseSHA, testutil.GetHeadHash(t, repoDir))
41+
require.Error(t, err)
42+
require.Contains(t, output, "oversized.bin")
43+
}
44+
45+
func TestCheckPRBinaries_MergeCommitStillFails(t *testing.T) {
46+
t.Parallel()
47+
48+
repoDir := t.TempDir()
49+
testutil.InitRepo(t, repoDir)
50+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
51+
testutil.GitAdd(t, repoDir, "README.md")
52+
testutil.GitCommit(t, repoDir, "init")
53+
baseSHA := testutil.GetHeadHash(t, repoDir)
54+
55+
runGitCommand(t, repoDir, "checkout", "-b", "feature")
56+
testutil.WriteFile(t, repoDir, "feature.txt", "feature\n")
57+
testutil.GitAdd(t, repoDir, "feature.txt")
58+
testutil.GitCommit(t, repoDir, "feature change")
59+
60+
runGitCommand(t, repoDir, "checkout", "-b", "side", baseSHA)
61+
largeBinary := bytes.Repeat([]byte{0}, 1048577)
62+
err := os.WriteFile(filepath.Join(repoDir, "oversized.bin"), largeBinary, 0o644)
63+
require.NoError(t, err)
64+
testutil.GitAdd(t, repoDir, "oversized.bin")
65+
testutil.GitCommit(t, repoDir, "add oversized binary")
66+
67+
runGitCommand(t, repoDir, "checkout", "feature")
68+
runGitCommand(t, repoDir, "merge", "--no-ff", "side", "-m", "merge side")
69+
70+
output, err := runBinaryCheckScript(t, repoDir, baseSHA, testutil.GetHeadHash(t, repoDir))
71+
require.Error(t, err)
72+
require.Contains(t, output, "oversized.bin")
73+
}
74+
75+
func TestCheckPRBinaries_SmallBinaryPasses(t *testing.T) {
76+
t.Parallel()
77+
78+
repoDir := t.TempDir()
79+
testutil.InitRepo(t, repoDir)
80+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
81+
testutil.GitAdd(t, repoDir, "README.md")
82+
testutil.GitCommit(t, repoDir, "init")
83+
baseSHA := testutil.GetHeadHash(t, repoDir)
84+
85+
writeBinaryFile(t, repoDir, "small.bin", bytes.Repeat([]byte{0}, 1024))
86+
testutil.GitAdd(t, repoDir, "small.bin")
87+
testutil.GitCommit(t, repoDir, "add small binary")
88+
89+
output, err := runBinaryCheckScript(t, repoDir, baseSHA, testutil.GetHeadHash(t, repoDir))
90+
require.NoError(t, err)
91+
require.Contains(t, output, "No oversized binary files found.")
92+
}
93+
94+
func TestCheckPRBinaries_ModifiedOversizedBinaryFails(t *testing.T) {
95+
t.Parallel()
96+
97+
repoDir := t.TempDir()
98+
testutil.InitRepo(t, repoDir)
99+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
100+
testutil.GitAdd(t, repoDir, "README.md")
101+
writeBinaryFile(t, repoDir, "asset.bin", bytes.Repeat([]byte{0}, 1024))
102+
testutil.GitAdd(t, repoDir, "asset.bin")
103+
testutil.GitCommit(t, repoDir, "init")
104+
baseSHA := testutil.GetHeadHash(t, repoDir)
105+
106+
writeBinaryFile(t, repoDir, "asset.bin", bytes.Repeat([]byte{1}, 1048577))
107+
testutil.GitAdd(t, repoDir, "asset.bin")
108+
testutil.GitCommit(t, repoDir, "grow binary")
109+
110+
output, err := runBinaryCheckScript(t, repoDir, baseSHA, testutil.GetHeadHash(t, repoDir))
111+
require.Error(t, err)
112+
require.Contains(t, output, "asset.bin")
113+
}
114+
115+
func TestCheckPRBinaries_LargeTextFilePasses(t *testing.T) {
116+
t.Parallel()
117+
118+
repoDir := t.TempDir()
119+
testutil.InitRepo(t, repoDir)
120+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
121+
testutil.GitAdd(t, repoDir, "README.md")
122+
testutil.GitCommit(t, repoDir, "init")
123+
baseSHA := testutil.GetHeadHash(t, repoDir)
124+
125+
largeText := bytes.Repeat([]byte("a"), 1048577)
126+
err := os.WriteFile(filepath.Join(repoDir, "large.txt"), largeText, 0o644)
127+
require.NoError(t, err)
128+
testutil.GitAdd(t, repoDir, "large.txt")
129+
testutil.GitCommit(t, repoDir, "add large text file")
130+
131+
output, err := runBinaryCheckScript(t, repoDir, baseSHA, testutil.GetHeadHash(t, repoDir))
132+
require.NoError(t, err)
133+
require.Contains(t, output, "No oversized binary files found.")
134+
}
135+
136+
func TestCheckPRBinaries_CustomThreshold(t *testing.T) {
137+
t.Parallel()
138+
139+
repoDir := t.TempDir()
140+
testutil.InitRepo(t, repoDir)
141+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
142+
testutil.GitAdd(t, repoDir, "README.md")
143+
testutil.GitCommit(t, repoDir, "init")
144+
baseSHA := testutil.GetHeadHash(t, repoDir)
145+
146+
writeBinaryFile(t, repoDir, "large.bin", bytes.Repeat([]byte{0}, 1572864))
147+
testutil.GitAdd(t, repoDir, "large.bin")
148+
testutil.GitCommit(t, repoDir, "add medium binary")
149+
150+
output, err := runBinaryCheckScriptWithEnv(t, repoDir, baseSHA, testutil.GetHeadHash(t, repoDir), map[string]string{
151+
"MAX_BINARY_SIZE_BYTES": "2097152",
152+
})
153+
require.NoError(t, err)
154+
require.Contains(t, output, "No oversized binary files found.")
155+
}
156+
157+
func TestCheckPRBinaries_InvalidThresholdFails(t *testing.T) {
158+
t.Parallel()
159+
160+
repoDir := t.TempDir()
161+
testutil.InitRepo(t, repoDir)
162+
testutil.WriteFile(t, repoDir, "README.md", "init\n")
163+
testutil.GitAdd(t, repoDir, "README.md")
164+
testutil.GitCommit(t, repoDir, "init")
165+
166+
output, err := runBinaryCheckScriptWithEnv(t, repoDir, testutil.GetHeadHash(t, repoDir), "HEAD", map[string]string{
167+
"MAX_BINARY_SIZE_BYTES": "not-a-number",
168+
})
169+
require.Error(t, err)
170+
require.Contains(t, output, "MAX_BINARY_SIZE_BYTES must be an integer")
171+
}
172+
173+
func runBinaryCheckScript(t *testing.T, repoDir, baseSHA, headSHA string) (string, error) {
174+
t.Helper()
175+
176+
return runBinaryCheckScriptWithEnv(t, repoDir, baseSHA, headSHA, nil)
177+
}
178+
179+
func runBinaryCheckScriptWithEnv(t *testing.T, repoDir, baseSHA, headSHA string, extraEnv map[string]string) (string, error) {
180+
t.Helper()
181+
182+
_, filename, _, ok := runtime.Caller(0)
183+
require.True(t, ok)
184+
185+
scriptPath := filepath.Join(filepath.Dir(filename), "..", "..", "..", "scripts", "check-pr-binaries.sh")
186+
cmd := exec.CommandContext(context.Background(), "bash", scriptPath, baseSHA, headSHA)
187+
cmd.Dir = repoDir
188+
cmd.Env = os.Environ()
189+
for key, value := range extraEnv {
190+
cmd.Env = append(cmd.Env, key+"="+value)
191+
}
192+
output, err := cmd.CombinedOutput()
193+
return string(output), err
194+
}
195+
196+
func writeBinaryFile(t *testing.T, repoDir, path string, content []byte) {
197+
t.Helper()
198+
199+
err := os.WriteFile(filepath.Join(repoDir, path), content, 0o644)
200+
require.NoError(t, err)
201+
}
202+
203+
func runGitCommand(t *testing.T, repoDir string, args ...string) {
204+
t.Helper()
205+
206+
cmd := exec.CommandContext(context.Background(), "git", args...)
207+
cmd.Dir = repoDir
208+
output, err := cmd.CombinedOutput()
209+
require.NoErrorf(t, err, "git %v failed: %s", args, output)
210+
}

scripts/check-pr-binaries.sh

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
log() {
5+
echo "$@"
6+
}
7+
8+
fail() {
9+
echo "::error::$1" >&2
10+
exit 1
11+
}
12+
13+
base_sha="${1:-${GITHUB_BASE_SHA:-}}"
14+
head_ref="${2:-HEAD}"
15+
max_binary_size_bytes="${MAX_BINARY_SIZE_BYTES:-1048576}"
16+
17+
if [[ -z "${base_sha}" ]]; then
18+
fail "Missing base SHA. Pass it as the first argument or set GITHUB_BASE_SHA."
19+
fi
20+
21+
if ! [[ "${max_binary_size_bytes}" =~ ^[0-9]+$ ]]; then
22+
fail "MAX_BINARY_SIZE_BYTES must be an integer, got: ${max_binary_size_bytes}"
23+
fi
24+
25+
if ! git rev-parse --verify "${base_sha}^{commit}" >/dev/null 2>&1; then
26+
fail "Base commit not found locally: ${base_sha}"
27+
fi
28+
29+
if ! git rev-parse --verify "${head_ref}^{commit}" >/dev/null 2>&1; then
30+
fail "Head commit not found locally: ${head_ref}"
31+
fi
32+
33+
range="${base_sha}...${head_ref}"
34+
35+
log "Checking binary files in ${range}"
36+
log "Maximum allowed binary size: ${max_binary_size_bytes} bytes"
37+
38+
violations=()
39+
40+
while IFS= read -r commit; do
41+
[[ -z "${commit}" ]] && continue
42+
43+
while IFS=$'\t' read -r added deleted path; do
44+
[[ -z "${path}" ]] && continue
45+
46+
# In git numstat output, binary diffs use "-" for added/deleted counts.
47+
if [[ "${added}" != "-" || "${deleted}" != "-" ]]; then
48+
continue
49+
fi
50+
51+
blob_size="$(git cat-file -s "${commit}:${path}")"
52+
if (( blob_size <= max_binary_size_bytes )); then
53+
continue
54+
fi
55+
56+
violations+=("${path}:${blob_size}")
57+
done < <(git diff-tree -m --root --no-commit-id --diff-filter=AM -r --numstat "${commit}")
58+
done < <(git rev-list --reverse "${base_sha}..${head_ref}")
59+
60+
if (( ${#violations[@]} == 0 )); then
61+
log "No oversized binary files found."
62+
exit 0
63+
fi
64+
65+
log "Oversized binary files detected:"
66+
for violation in "${violations[@]}"; do
67+
path="${violation%%:*}"
68+
size="${violation##*:}"
69+
echo " - ${path} (${size} bytes)"
70+
done
71+
72+
fail "Pull request includes binary files larger than ${max_binary_size_bytes} bytes."

0 commit comments

Comments
 (0)