Skip to content

Commit 6fa78e8

Browse files
authored
quickselect: implement several optimizations (#6)
This commit implements a few optimizations of bottlenecks that showed up in profiles: 1. Use math/rand/v2 which doesn't use global package level mutex. 2. Roll our own heap that avoids tons of spurious `interface{}` allocations. 3. Reduce calls to data.Len() in loops
1 parent ed8402a commit 6fa78e8

File tree

2 files changed

+44
-46
lines changed

2 files changed

+44
-46
lines changed

quickselect.go

+38-40
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ heap implementations).
99
package quickselect
1010

1111
import (
12-
"container/heap"
13-
"errors"
1412
"fmt"
15-
"math/rand"
13+
"math/rand/v2"
1614
)
1715

1816
const (
@@ -152,7 +150,7 @@ func randomizedSelectionFinding(data Interface, low, high, k int) {
152150
return
153151
}
154152

155-
pivotIndex = rand.Intn(high+1-low) + low
153+
pivotIndex = rand.IntN(high+1-low) + low
156154
pivotIndex = partition(data, low, high, pivotIndex)
157155

158156
if k < pivotIndex {
@@ -186,7 +184,8 @@ func naiveSelectionFinding(data Interface, k int) {
186184
}
187185
resetLargestIndex(smallestIndices, data)
188186

189-
for i := k; i < data.Len(); i++ {
187+
length := data.Len()
188+
for i := k; i < length; i++ {
190189
if data.Less(i, smallestIndices[k-1]) {
191190
smallestIndices[k-1] = i
192191
resetLargestIndex(smallestIndices, data)
@@ -239,27 +238,30 @@ func partition(data Interface, low, high, pivotIndex int) int {
239238
return partitionIndex
240239
}
241240

242-
type dataHeap struct {
243-
heapIndices []int
244-
data Interface
245-
}
246-
247-
func (h dataHeap) Len() int { return len(h.heapIndices) }
248-
func (h dataHeap) Less(i, j int) bool { return h.data.Less(h.heapIndices[j], h.heapIndices[i]) }
249-
func (h dataHeap) Swap(i, j int) {
250-
h.heapIndices[i], h.heapIndices[j] = h.heapIndices[j], h.heapIndices[i]
251-
}
252-
253-
func (h *dataHeap) Push(x interface{}) {
254-
h.heapIndices = append(h.heapIndices, x.(int))
241+
func heapInit(data Interface, heap []int) {
242+
// Heapify process
243+
n := len(heap)
244+
for i := n/2 - 1; i >= 0; i-- {
245+
heapDown(data, heap, i, n)
246+
}
255247
}
256248

257-
func (h *dataHeap) Pop() interface{} {
258-
old := h.heapIndices
259-
n := len(old)
260-
x := old[n-1]
261-
h.heapIndices = old[0 : n-1]
262-
return x
249+
func heapDown(data Interface, heap []int, i, n int) {
250+
for {
251+
j1 := 2*i + 1
252+
if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
253+
break
254+
}
255+
j := j1 // left child
256+
if j2 := j1 + 1; j2 < n && data.Less(heap[j1], heap[j2]) {
257+
j = j2 // right child
258+
}
259+
if !data.Less(heap[i], heap[j]) {
260+
break
261+
}
262+
heap[i], heap[j] = heap[j], heap[i]
263+
i = j
264+
}
263265
}
264266

265267
/*
@@ -268,26 +270,23 @@ It keeps a max-heap of the smallest k elements seen so far as we iterate over
268270
all of the elements. It adds a new element and pops the largest element.
269271
*/
270272
func heapSelectionFinding(data Interface, k int) {
271-
heapIndices := make([]int, k)
273+
heap := make([]int, k)
272274
for i := 0; i < k; i++ {
273-
heapIndices[i] = i
275+
heap[i] = i
274276
}
277+
heapInit(data, heap)
275278

276-
h := &dataHeap{heapIndices, data}
277-
heap.Init(h)
278-
var currentHeapMax int
279-
for i := k; i < data.Len(); i++ {
280-
currentHeapMax = h.heapIndices[0]
281-
282-
if data.Less(i, currentHeapMax) {
283-
heap.Push(h, i)
284-
heap.Pop(h)
279+
length := data.Len()
280+
for i := k; i < length; i++ {
281+
if data.Less(i, heap[0]) {
282+
heap[0] = i
283+
heapDown(data, heap, 0, k)
285284
}
286285
}
287286

288-
insertionSort(IntSlice(h.heapIndices), 0, len(h.heapIndices))
289-
for i := 0; i < len(h.heapIndices); i++ {
290-
data.Swap(i, h.heapIndices[i])
287+
insertionSort(IntSlice(heap), 0, k)
288+
for i := 0; i < k; i++ {
289+
data.Swap(i, heap[i])
291290
}
292291
}
293292

@@ -306,8 +305,7 @@ method will raise an error.
306305
func QuickSelect(data Interface, k int) error {
307306
length := data.Len()
308307
if k < 1 || k > length {
309-
message := fmt.Sprintf("The specified index '%d' is outside of the data's range of indices [0,%d)", k, length)
310-
return errors.New(message)
308+
return fmt.Errorf("The specified index '%d' is outside of the data's range of indices [0,%d)", k, length)
311309
}
312310

313311
kRatio := float64(k) / float64(length)

quickselect_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func TestQuickSelectWithSimpleArray(t *testing.T) {
3131
smallestK := fixture.Array[:5]
3232
expectedK := []int{2, 3, 4, 5, 6}
3333
if !hasSameElements(smallestK, expectedK) {
34-
t.Errorf("Expected smallest K elements to be '%s', but got '%s'", expectedK, smallestK)
34+
t.Errorf("Expected smallest K elements to be '%v', but got '%v'", expectedK, smallestK)
3535
}
3636
}
3737

@@ -45,7 +45,7 @@ func TestQuickSelectWithRepeatedElements(t *testing.T) {
4545
smallestK := fixture.Array[:5]
4646
expectedK := []int{2, 2, 2, 3, 3}
4747
if !hasSameElements(smallestK, expectedK) {
48-
t.Errorf("Expected smallest K elements to be '%s', but got '%s'", expectedK, smallestK)
48+
t.Errorf("Expected smallest K elements to be '%v', but got '%v'", expectedK, smallestK)
4949
}
5050
}
5151

@@ -85,7 +85,7 @@ func TestIntSliceQuickSelect(t *testing.T) {
8585

8686
resultK := fixture.Array[:4]
8787
if !hasSameElements(resultK, fixture.ExpectedK) {
88-
t.Errorf("Expected smallest K elements to be '%s', but got '%s'", fixture.ExpectedK, resultK)
88+
t.Errorf("Expected smallest K elements to be '%v', but got '%v'", fixture.ExpectedK, resultK)
8989
}
9090
}
9191
}
@@ -131,7 +131,7 @@ func TestNaiveSelectionFinding(t *testing.T) {
131131

132132
resultK := fixture.Array[:4]
133133
if !hasSameElements(resultK, fixture.ExpectedK) {
134-
t.Errorf("Expected smallest K elements to be '%s', but got '%s'", fixture.ExpectedK, resultK)
134+
t.Errorf("Expected smallest K elements to be '%v', but got '%v'", fixture.ExpectedK, resultK)
135135
}
136136
}
137137
}
@@ -153,7 +153,7 @@ func TestHeapSelectionFinding(t *testing.T) {
153153

154154
resultK := fixture.Array[:4]
155155
if !hasSameElements(resultK, fixture.ExpectedK) {
156-
t.Errorf("Expected smallest K elements to be '%s', but got '%s'", fixture.ExpectedK, resultK)
156+
t.Errorf("Expected smallest K elements to be '%v', but got '%v'", fixture.ExpectedK, resultK)
157157
}
158158
}
159159
}
@@ -176,7 +176,7 @@ func TestFloat64SliceQuickSelect(t *testing.T) {
176176

177177
resultK := fixture.Array[:4]
178178
if !hasSameElementsFloat64(resultK, fixture.ExpectedK) {
179-
t.Errorf("Expected smallest K elements to be '%s', but got '%s'", fixture.ExpectedK, resultK)
179+
t.Errorf("Expected smallest K elements to be '%v', but got '%v'", fixture.ExpectedK, resultK)
180180
}
181181
}
182182
}

0 commit comments

Comments
 (0)