From 353a2a53f5d66d848d90afe142eb0106f5ab8293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Povilas=20Balzaravi=C4=8Dius?= Date: Fri, 16 Jun 2023 12:40:46 +0300 Subject: [PATCH] [ci-matrix] Distribute unknown packages between shards. Currently all new packages which are not in timing files are assigned to a single bucket with shortest duration. This happened because duration of "unknown" packages is set to 0 but in reality it might be longer. Also empty packages with no tests have 0s duration but there is little overhead to run those tests. To avoid all such packages adding into a single bucket, change how those are distributed between shards. If package run duration is lower than threshold - assign it by using hash function instead of minimal bucket. --- cmd/tool/matrix/matrix.go | 41 ++++++++++++++++++++++++++++++++-- cmd/tool/matrix/matrix_test.go | 23 ++++++++++--------- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/cmd/tool/matrix/matrix.go b/cmd/tool/matrix/matrix.go index a41752cf..d6b176fa 100644 --- a/cmd/tool/matrix/matrix.go +++ b/cmd/tool/matrix/matrix.go @@ -2,6 +2,7 @@ package matrix import ( "bufio" + "crypto/md5" "encoding/json" "fmt" "io" @@ -9,6 +10,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "time" @@ -187,14 +189,32 @@ func closeFiles(files []*os.File) { } } +// consistentThreshold sets a duration of package test run under which packages +// are assigned to buckets in a consistent way instead of using a bucket of +// minium total duration. This helps to avoid situation when many short-running +// or unknown packages are assigned to a single bucket. +const consistentThreshold = 5 * time.Millisecond + func bucketPackages(timing map[string]time.Duration, packages []string, n uint) []bucket { sort.SliceStable(packages, func(i, j int) bool { return timing[packages[i]] >= timing[packages[j]] }) - buckets := make([]bucket, n) + var ( + buckets = make([]bucket, n) + allBucketsUsed = false + ) for _, pkg := range packages { - i := minBucket(buckets) + var i int + if !allBucketsUsed { + allBucketsUsed = isAllBucketsUsed(buckets) + } + if timing[pkg] > consistentThreshold || !allBucketsUsed { + i = minBucket(buckets) + } else { + i = consistentBucket(pkg, n) + } + buckets[i].Total += timing[pkg] buckets[i].Packages = append(buckets[i].Packages, pkg) log.Debugf("adding %v (%v) to bucket %v with total %v", @@ -218,6 +238,23 @@ func minBucket(buckets []bucket) int { return n } +func isAllBucketsUsed(buckets []bucket) bool { + for _, b := range buckets { + if b.Total == 0 { + return false + } + } + return true +} + +func consistentBucket(pkg string, n uint) int { + h := md5.New() + io.WriteString(h, pkg) + hash := fmt.Sprintf("%x", h.Sum(nil)) + decimal, _ := strconv.ParseUint(hash[:10], 16, 64) + return int(decimal) % int(n) +} + type bucket struct { Total time.Duration Packages []string diff --git a/cmd/tool/matrix/matrix_test.go b/cmd/tool/matrix/matrix_test.go index f4a9b3cc..f3508d17 100644 --- a/cmd/tool/matrix/matrix_test.go +++ b/cmd/tool/matrix/matrix_test.go @@ -67,6 +67,7 @@ func TestBucketPackages(t *testing.T) { } run := func(t *testing.T, tc testCase) { + t.Helper() buckets := bucketPackages(timing, packages, tc.n) assert.DeepEqual(t, buckets, tc.expected) } @@ -75,25 +76,25 @@ func TestBucketPackages(t *testing.T) { { n: 2, expected: []bucket{ - 0: {Total: 4440 * ms, Packages: []string{"four", "two", "one", "five"}}, - 1: {Total: 4406 * ms, Packages: []string{"three", "six", "new2", "new1"}}, + 0: {Total: 4440 * ms, Packages: []string{"four", "two", "one", "five", "new2", "new1"}}, + 1: {Total: 4406 * ms, Packages: []string{"three", "six"}}, }, }, { n: 3, expected: []bucket{ 0: {Total: 4000 * ms, Packages: []string{"four"}}, - 1: {Total: 3800 * ms, Packages: []string{"three"}}, - 2: {Total: 1046 * ms, Packages: []string{"six", "two", "one", "five", "new1", "new2"}}, + 1: {Total: 3800 * ms, Packages: []string{"three", "new1"}}, + 2: {Total: 1046 * ms, Packages: []string{"six", "two", "one", "five", "new2"}}, }, }, { n: 4, expected: []bucket{ - 0: {Total: 4000 * ms, Packages: []string{"four"}}, + 0: {Total: 4000 * ms, Packages: []string{"four", "new1"}}, 1: {Total: 3800 * ms, Packages: []string{"three"}}, - 2: {Total: 606 * ms, Packages: []string{"six"}}, - 3: {Total: 440 * ms, Packages: []string{"two", "one", "five", "new2", "new1"}}, + 2: {Total: 606 * ms, Packages: []string{"six", "new2"}}, + 3: {Total: 440 * ms, Packages: []string{"two", "one", "five"}}, }, }, { @@ -232,16 +233,16 @@ var expectedMatrix = `{ "packages": "pkg2" }, { - "description": "1 - pkg1", + "description": "1 - pkg1 and 1 others", "estimatedRuntime": "4s", "id": 1, - "packages": "pkg1" + "packages": "pkg1 other" }, { - "description": "2 - pkg0 and 1 others", + "description": "2 - pkg0", "estimatedRuntime": "2s", "id": 2, - "packages": "pkg0 other" + "packages": "pkg0" } ] }`