Skip to content

Commit 8584cd5

Browse files
authored
Add String() methods to parsed types (#48)
This enables clients to move back and forth between parsed objects and text patches. The generated patches are semantically equal to the parsed object and should re-parse to the same object, but may not be byte-for-byte identical to the original input. In my testing, formatted text patches are usually identical to the input, but there may be cases where this is not true. Binary patches always differ. This is because Go's 'compress/flate' package ends streams with an empty block instead of adding the end-of-stream flag to the last non-empty block, like Git's C implementation. Since the streams will always be different for this reason, I chose to also enable default compression (the test patches I generated with Git used no compression.) The main tests for this feature involve parsing, formatting, and then re-parsing a patch to make sure we get equal objects. Formatting is handled by a new internal formatter type, which allows writing all data to the same stream. This isn't exposed publicly right now, but will be useful if there's a need for more flexible formatting functions in the future, like formatting to a user-provided io.Writer.
1 parent 9e0997e commit 8584cd5

20 files changed

+746
-4
lines changed

.golangci.yml

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ issues:
1919
exclude-use-default: false
2020

2121
linters-settings:
22+
errcheck:
23+
exclude-functions:
24+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write
25+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString
26+
- (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte
27+
- fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter)
2228
goimports:
2329
local-prefixes: github.com/bluekeyes/go-gitdiff
2430
revive:

gitdiff/base85.go

+41-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ func init() {
1919
}
2020

2121
// base85Decode decodes Base85-encoded data from src into dst. It uses the
22-
// alphabet defined by base85.c in the Git source tree, which appears to be
23-
// unique. src must contain at least len(dst) bytes of encoded data.
22+
// alphabet defined by base85.c in the Git source tree. src must contain at
23+
// least len(dst) bytes of encoded data.
2424
func base85Decode(dst, src []byte) error {
2525
var v uint32
2626
var n, ndst int
@@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error {
5050
}
5151
return nil
5252
}
53+
54+
// base85Encode encodes src in Base85, writing the result to dst. It uses the
55+
// alphabet defined by base85.c in the Git source tree.
56+
func base85Encode(dst, src []byte) {
57+
var di, si int
58+
59+
encode := func(v uint32) {
60+
dst[di+0] = b85Alpha[(v/(85*85*85*85))%85]
61+
dst[di+1] = b85Alpha[(v/(85*85*85))%85]
62+
dst[di+2] = b85Alpha[(v/(85*85))%85]
63+
dst[di+3] = b85Alpha[(v/85)%85]
64+
dst[di+4] = b85Alpha[v%85]
65+
}
66+
67+
n := (len(src) / 4) * 4
68+
for si < n {
69+
encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]))
70+
si += 4
71+
di += 5
72+
}
73+
74+
var v uint32
75+
switch len(src) - si {
76+
case 3:
77+
v |= uint32(src[si+2]) << 8
78+
fallthrough
79+
case 2:
80+
v |= uint32(src[si+1]) << 16
81+
fallthrough
82+
case 1:
83+
v |= uint32(src[si+0]) << 24
84+
encode(v)
85+
}
86+
}
87+
88+
// base85Len returns the length of n bytes of Base85 encoded data.
89+
func base85Len(n int) int {
90+
return (n + 3) / 4 * 5
91+
}

gitdiff/base85_test.go

+58
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gitdiff
22

33
import (
4+
"bytes"
45
"testing"
56
)
67

@@ -58,3 +59,60 @@ func TestBase85Decode(t *testing.T) {
5859
})
5960
}
6061
}
62+
63+
func TestBase85Encode(t *testing.T) {
64+
tests := map[string]struct {
65+
Input []byte
66+
Output string
67+
}{
68+
"zeroBytes": {
69+
Input: []byte{},
70+
Output: "",
71+
},
72+
"twoBytes": {
73+
Input: []byte{0xCA, 0xFE},
74+
Output: "%KiWV",
75+
},
76+
"fourBytes": {
77+
Input: []byte{0x0, 0x0, 0xCA, 0xFE},
78+
Output: "007GV",
79+
},
80+
"sixBytes": {
81+
Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE},
82+
Output: "007GV%KiWV",
83+
},
84+
}
85+
86+
for name, test := range tests {
87+
t.Run(name, func(t *testing.T) {
88+
dst := make([]byte, len(test.Output))
89+
base85Encode(dst, test.Input)
90+
for i, b := range test.Output {
91+
if dst[i] != byte(b) {
92+
t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i])
93+
}
94+
}
95+
})
96+
}
97+
}
98+
99+
func FuzzBase85Roundtrip(f *testing.F) {
100+
f.Add([]byte{0x2b, 0x0d})
101+
f.Add([]byte{0xbc, 0xb4, 0x3f})
102+
f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25})
103+
f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8})
104+
105+
f.Fuzz(func(t *testing.T, in []byte) {
106+
n := len(in)
107+
dst := make([]byte, base85Len(n))
108+
out := make([]byte, n)
109+
110+
base85Encode(dst, in)
111+
if err := base85Decode(out, dst); err != nil {
112+
t.Fatalf("unexpected error decoding base85 data: %v", err)
113+
}
114+
if !bytes.Equal(in, out) {
115+
t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst))
116+
}
117+
})
118+
}

0 commit comments

Comments
 (0)