Skip to content

Commit

Permalink
Merge pull request #28 from pjbgf/optimisations
Browse files Browse the repository at this point in the history
goasm: Implement amd64 optimised code
  • Loading branch information
pjbgf authored Feb 25, 2023
2 parents 3b4a158 + 95a7df9 commit a2b84bc
Show file tree
Hide file tree
Showing 15 changed files with 2,797 additions and 61 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ jobs:
uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
with:
go-version: ${{ matrix.go-version }}
- name: Run Verify
run: make verify
- name: Run Cross Build
run: make cross-build
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,11 @@ build-nocgo:

# Run cross-compilation to assure supported architectures.
cross-build: build-arm build-arm64 build-nocgo

generate:
go run sha1cdblock_amd64_asm.go -out sha1cdblock_amd64.s
sed -i 's;&\samd64;&\n// +build !noasm,gc,amd64;g' sha1cdblock_amd64.s

verify: generate
git diff --exit-code
go vet ./...
9 changes: 8 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
module github.com/pjbgf/sha1cd

go 1.15
go 1.19

require (
github.com/mmcloughlin/avo v0.5.0 // indirect
golang.org/x/mod v0.6.0 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/tools v0.2.0 // indirect
)
37 changes: 37 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
github.com/mmcloughlin/avo v0.5.0 h1:nAco9/aI9Lg2kiuROBY6BhCI/z0t5jEvJfjWbL8qXLU=
github.com/mmcloughlin/avo v0.5.0/go.mod h1:ChHFdoV7ql95Wi7vuq2YT1bwCJqiWdZrQ1im3VujLYM=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/arch v0.1.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.6.0 h1:b9gGHsz9/HhJ3HF5DHQytPpuwocVTChQJK3AvoLRD5I=
golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE=
golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
4 changes: 4 additions & 0 deletions internal/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ const (
// SHA1 processes the input data in chunks. Each chunk contains 64 bytes.
Chunk = 64

// The number of pre-step compression state to store.
// Currently there are 3 pre-step compression states required: 0, 58, 65.
PreStepState = 3

Magic = "shacd\x01"
MarshaledSize = len(Magic) + 5*4 + Chunk + 8
)
18 changes: 15 additions & 3 deletions sha1cd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ type digest struct {
len uint64

// col defines whether a collision has been found.
col bool
col bool
blockFunc func(dig *digest, p []byte)
}

func (d *digest) MarshalBinary() ([]byte, error) {
Expand Down Expand Up @@ -127,6 +128,17 @@ func (d *digest) Reset() {
func New() hash.Hash {
d := new(digest)

d.blockFunc = block
d.Reset()
return d
}

// NewGeneric is equivalent to New but uses the Go generic implementation,
// avoiding any processor-specific optimizations.
func NewGeneric() hash.Hash {
d := new(digest)

d.blockFunc = blockGeneric
d.Reset()
return d
}
Expand All @@ -146,14 +158,14 @@ func (d *digest) Write(p []byte) (nn int, err error) {
n := copy(d.x[d.nx:], p)
d.nx += n
if d.nx == shared.Chunk {
block(d, d.x[:])
d.blockFunc(d, d.x[:])
d.nx = 0
}
p = p[n:]
}
if len(p) >= shared.Chunk {
n := len(p) &^ (shared.Chunk - 1)
block(d, p[:n])
d.blockFunc(d, p[:n])
p = p[n:]
}
if len(p) > 0 {
Expand Down
15 changes: 12 additions & 3 deletions sha1cd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,16 +193,25 @@ func TestLargeHashes(t *testing.T) {
}

func TestAllocations(t *testing.T) {
t.Run("generic", func(t *testing.T) {
testAllocations(NewGeneric(), t)
})

t.Run("native", func(t *testing.T) {
testAllocations(New(), t)
})
}

func testAllocations(h hash.Hash, t *testing.T) {
in := []byte("hello, world!")
out := make([]byte, 0, Size)
h := New()
n := int(testing.AllocsPerRun(10, func() {
h.Reset()
h.Write(in)
out = h.Sum(out[:0])
}))

if n > 0 {
t.Errorf("allocs = %d, want < 1", n)
if n > 2 {
t.Errorf("allocs = %d, want < 3", n)
}
}
50 changes: 50 additions & 0 deletions sha1cdblock_amd64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//go:build !noasm && gc && amd64
// +build !noasm,gc,amd64

package sha1cd

import (
"math"
"unsafe"

shared "github.com/pjbgf/sha1cd/internal"
)

type sliceHeader struct {
base uintptr
len int
cap int
}

// blockAMD64 hashes the message p into the current state in dig.
// Both m1 and cs are used to store intermediate results which are used by the collision detection logic.
//
//go:noescape
func blockAMD64(dig *digest, p sliceHeader, m1 []uint32, cs [][5]uint32)

func block(dig *digest, p []byte) {
m1 := [shared.Rounds]uint32{}
cs := [shared.PreStepState][shared.WordBuffers]uint32{}

for len(p) >= shared.Chunk {
// Only send a block to be processed, as the collission detection
// works on a block by block basis.
ips := sliceHeader{
base: uintptr(unsafe.Pointer(&p[0])),
len: int(math.Min(float64(len(p)), float64(shared.Chunk))),
cap: shared.Chunk,
}

blockAMD64(dig, ips, m1[:], cs[:])

col := checkCollision(m1, cs, dig.h)
if col {
dig.col = true

blockAMD64(dig, ips, m1[:], cs[:])
blockAMD64(dig, ips, m1[:], cs[:])
}

p = p[shared.Chunk:]
}
}
Loading

0 comments on commit a2b84bc

Please sign in to comment.