Skip to content

Commit 53bcdf7

Browse files
authored
Fix EOF error for some files without final newline (#27)
If a file was an exact multiple of 1024 bytes (the size of an internal buffer) and was missing a final newline, the LineReaderAt implementation would drop the last line, leading to an unexpected EOF error on apply. In addition to fixing the bug, slightly change the behavior of ReadLineAt to reflect how it is actually used: 1. Clarify that the return value n includes all lines instead of only lines with a final newline. This was already true except in the case of the bug fixed by this commit. 2. Only return io.EOF if fewer lines are read than requested. The previous implementation also returned io.EOF if the last line was missing a final newline, but this was confusing and didn't really serve a purpose. This is technically a breaking change for external implementations but an implementation that exactly followed the "spec" was already broken in certain edge cases.
1 parent b575654 commit 53bcdf7

File tree

3 files changed

+77
-28
lines changed

3 files changed

+77
-28
lines changed

gitdiff/apply.go

+1-4
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,7 @@ func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error {
231231

232232
preimage := make([][]byte, fragEnd-start)
233233
n, err := a.lineSrc.ReadLinesAt(preimage, start)
234-
switch {
235-
case err == nil:
236-
case err == io.EOF && n == len(preimage): // last line of frag has no newline character
237-
default:
234+
if err != nil {
238235
return applyError(err, lineNum(start+int64(n)))
239236
}
240237

gitdiff/io.go

+22-22
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,23 @@ import (
55
"io"
66
)
77

8+
const (
9+
byteBufferSize = 32 * 1024 // from io.Copy
10+
lineBufferSize = 32
11+
indexBufferSize = 1024
12+
)
13+
814
// LineReaderAt is the interface that wraps the ReadLinesAt method.
915
//
10-
// ReadLinesAt reads len(lines) into lines starting at line offset in the
11-
// input source. It returns number of full lines read (0 <= n <= len(lines))
12-
// and any error encountered. Line numbers are zero-indexed.
16+
// ReadLinesAt reads len(lines) into lines starting at line offset. It returns
17+
// the number of lines read (0 <= n <= len(lines)) and any error encountered.
18+
// Line numbers are zero-indexed.
1319
//
1420
// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more
1521
// lines were not returned.
1622
//
17-
// Each full line includes the line ending character(s). If the last line of
18-
// the input does not have a line ending character, ReadLinesAt returns the
19-
// content of the line and io.EOF.
20-
//
21-
// If the content of the input source changes after the first call to
22-
// ReadLinesAt, the behavior of future calls is undefined.
23+
// Lines read by ReadLinesAt include the newline character. The last line does
24+
// not have a final newline character if the input ends without one.
2325
type LineReaderAt interface {
2426
ReadLinesAt(lines [][]byte, offset int64) (n int, err error)
2527
}
@@ -65,7 +67,7 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err
6567
lines[n] = buf[start:end]
6668
}
6769

68-
if n < count || buf[len(buf)-1] != '\n' {
70+
if n < count {
6971
return n, io.EOF
7072
}
7173
return n, nil
@@ -75,13 +77,9 @@ func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err err
7577
// for line or a read returns io.EOF. It returns an error if and only if there
7678
// is an error reading data.
7779
func (r *lineReaderAt) indexTo(line int64) error {
78-
var buf [1024]byte
79-
80-
var offset int64
81-
if len(r.index) > 0 {
82-
offset = r.index[len(r.index)-1]
83-
}
80+
var buf [indexBufferSize]byte
8481

82+
offset := r.lastOffset()
8583
for int64(len(r.index)) < line {
8684
n, err := r.r.ReadAt(buf[:], offset)
8785
if err != nil && err != io.EOF {
@@ -94,7 +92,7 @@ func (r *lineReaderAt) indexTo(line int64) error {
9492
}
9593
}
9694
if err == io.EOF {
97-
if n > 0 && buf[n-1] != '\n' {
95+
if offset > r.lastOffset() {
9896
r.index = append(r.index, offset)
9997
}
10098
r.eof = true
@@ -104,6 +102,13 @@ func (r *lineReaderAt) indexTo(line int64) error {
104102
return nil
105103
}
106104

105+
func (r *lineReaderAt) lastOffset() int64 {
106+
if n := len(r.index); n > 0 {
107+
return r.index[n-1]
108+
}
109+
return 0
110+
}
111+
107112
// readBytes reads the bytes of the n lines starting at line and returns the
108113
// bytes and the offset of the first byte in the underlying source.
109114
func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) {
@@ -147,11 +152,6 @@ func isLen(r io.ReaderAt, n int64) (bool, error) {
147152
return false, err
148153
}
149154

150-
const (
151-
byteBufferSize = 32 * 1024 // from io.Copy
152-
lineBufferSize = 32
153-
)
154-
155155
// copyFrom writes bytes starting from offset off in src to dst stopping at the
156156
// end of src or at the first error. copyFrom returns the number of bytes
157157
// written and any error.

gitdiff/io_test.go

+54-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
)
1010

1111
func TestLineReaderAt(t *testing.T) {
12+
const lineTemplate = "generated test line %d\n"
13+
1214
tests := map[string]struct {
1315
InputLines int
1416
Offset int64
@@ -42,6 +44,11 @@ func TestLineReaderAt(t *testing.T) {
4244
Offset: 2,
4345
Count: 0,
4446
},
47+
"readAllLines": {
48+
InputLines: 64,
49+
Offset: 0,
50+
Count: 64,
51+
},
4552
"readThroughEOF": {
4653
InputLines: 16,
4754
Offset: 12,
@@ -71,8 +78,6 @@ func TestLineReaderAt(t *testing.T) {
7178
},
7279
}
7380

74-
const lineTemplate = "generated test line %d\n"
75-
7681
for name, test := range tests {
7782
t.Run(name, func(t *testing.T) {
7883
var input bytes.Buffer
@@ -114,6 +119,53 @@ func TestLineReaderAt(t *testing.T) {
114119
}
115120
})
116121
}
122+
123+
newlineTests := map[string]struct {
124+
InputSize int
125+
}{
126+
"readLinesNoFinalNewline": {
127+
InputSize: indexBufferSize + indexBufferSize/2,
128+
},
129+
"readLinesNoFinalNewlineBufferMultiple": {
130+
InputSize: 4 * indexBufferSize,
131+
},
132+
}
133+
134+
for name, test := range newlineTests {
135+
t.Run(name, func(t *testing.T) {
136+
input := bytes.Repeat([]byte("0"), test.InputSize)
137+
138+
var output [][]byte
139+
for i := 0; i < len(input); i++ {
140+
last := i
141+
i += rand.Intn(80)
142+
if i < len(input)-1 { // last character of input must not be a newline
143+
input[i] = '\n'
144+
output = append(output, input[last:i+1])
145+
} else {
146+
output = append(output, input[last:])
147+
}
148+
}
149+
150+
r := &lineReaderAt{r: bytes.NewReader(input)}
151+
lines := make([][]byte, len(output))
152+
153+
n, err := r.ReadLinesAt(lines, 0)
154+
if err != nil {
155+
t.Fatalf("unexpected error reading reading lines: %v", err)
156+
}
157+
158+
if n != len(output) {
159+
t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n)
160+
}
161+
162+
for i, line := range lines {
163+
if !bytes.Equal(output[i], line) {
164+
t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line)
165+
}
166+
}
167+
})
168+
}
117169
}
118170

119171
func TestCopyFrom(t *testing.T) {

0 commit comments

Comments
 (0)