Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
2de12d8
feat(shard distributor): add shard key helpers and metrics state
AndreasHolt Oct 19, 2025
5d95067
feat(shard distributor): persist shard metrics in etcd store
AndreasHolt Oct 19, 2025
6e57536
fix(shard distributor): update LastMoveTime in the case where a shard…
AndreasHolt Oct 19, 2025
595d320
test(shard distributor): add tests for shard metrics
AndreasHolt Oct 19, 2025
d9ba54d
fix(shard distributor): modify comment
AndreasHolt Oct 19, 2025
32d2ecd
fix(shard distributor): add atomic check to prevent metrics race
AndreasHolt Oct 19, 2025
b624a00
fix(shard distributor): apply shard metric updates in a second phase …
AndreasHolt Oct 19, 2025
aad7b2e
feat(shard distributor): move shard metric updates out of AssignShard…
AndreasHolt Oct 19, 2025
6360f8a
fix(shard distributor): keep NamespaceState revisions tied to assignm…
AndreasHolt Oct 20, 2025
1536d0a
refactor(shard distributor): use shard cache and clock for preparing …
AndreasHolt Oct 22, 2025
f316fbf
test(shard distributor): BuildShardPrefix, BuildShardKey, ParseShardKey
AndreasHolt Oct 22, 2025
4524da9
feat(shard distributor): simplify shard metrics updates
AndreasHolt Oct 23, 2025
126f725
refactor(shard distributor): ShardMetrics renamed to ShardStatistics.…
AndreasHolt Oct 24, 2025
cc53f68
test(shard distributor): small changes to shard key tests s.t. they l…
AndreasHolt Oct 25, 2025
733bbcb
fix(shard distributor): no longer check for key type ShardStatisticsK…
AndreasHolt Oct 25, 2025
6816b8e
refactor(shard distributor): found a place where I forgot to rename t…
AndreasHolt Oct 27, 2025
f97e0cf
fix(shard distributor): move non-exported helpers to end of file to f…
AndreasHolt Oct 27, 2025
513e88c
feat(shard distributor): clean up the shard statistics
AndreasHolt Oct 29, 2025
9833525
test(shard distributor): add test case for when shard stats are deleted
AndreasHolt Oct 29, 2025
0332fe5
fix(shard distributor): add mapping (new metric)
AndreasHolt Oct 29, 2025
d5a13d9
feat(shard distributor): retain shard stats while shards are within h…
AndreasHolt Oct 30, 2025
634bc02
feat: function to update shard statistics from heartbeat (currently n…
AndreasHolt Oct 27, 2025
812e854
test(shard distributor): add tests to verify statistics are updated a…
AndreasHolt Oct 27, 2025
b9813e7
feat(shard distributor): calculate smoothed load (ewma) using the Sha…
AndreasHolt Oct 27, 2025
dfb7448
fix(shard distributor): log invalid shard load
AndreasHolt Oct 27, 2025
36ec08f
chore: added logger warning and simplified ewma calculation
Theis-Mathiassen Nov 2, 2025
38a6e81
Merge branch 'master' into heartbeat-shard-statistics
Theis-Mathiassen Nov 6, 2025
af733e6
fix: remove duplicate test introduced in merge
AndreasHolt Nov 6, 2025
a52e86f
chore: consistent error checking, and rename function
Theis-Mathiassen Nov 11, 2025
abfc80e
chore: added decompress to unmarshal
Theis-Mathiassen Nov 11, 2025
df0feaf
feat(shard distributor): persist shard metrics in etcd store
AndreasHolt Oct 19, 2025
8546a26
feat(shard distributor): move shard metric updates out of AssignShard…
AndreasHolt Oct 19, 2025
dde87ef
fix(shard distributor): keep NamespaceState revisions tied to assignm…
AndreasHolt Oct 20, 2025
415e80c
refactor(shard distributor): use shard cache and clock for preparing …
AndreasHolt Oct 22, 2025
c67d5c3
feat(shard distributor): simplify shard metrics updates
AndreasHolt Oct 23, 2025
9ffcefb
refactor(shard distributor): ShardMetrics renamed to ShardStatistics.…
AndreasHolt Oct 24, 2025
cc769bf
refactor(shard distributor): found a place where I forgot to rename t…
AndreasHolt Oct 27, 2025
8c22663
fix(shard distributor): move non-exported helpers to end of file to f…
AndreasHolt Oct 27, 2025
5ac3c5d
test(shard distributor): add test case for when shard stats are deleted
AndreasHolt Oct 29, 2025
3973b82
feat(shard distributor): retain shard stats while shards are within h…
AndreasHolt Oct 30, 2025
3830d5e
feat: function to update shard statistics from heartbeat (currently n…
AndreasHolt Oct 27, 2025
443c0b1
test(shard distributor): add tests to verify statistics are updated a…
AndreasHolt Oct 27, 2025
9d159e7
feat(shard distributor): calculate smoothed load (ewma) using the Sha…
AndreasHolt Oct 27, 2025
18e63b7
fix(shard distributor): log invalid shard load
AndreasHolt Oct 27, 2025
e08a286
chore: added logger warning and simplified ewma calculation
Theis-Mathiassen Nov 2, 2025
08eb635
fix: remove duplicate test introduced in merge
AndreasHolt Nov 6, 2025
f63664a
chore: consistent error checking, and rename function
Theis-Mathiassen Nov 11, 2025
10e2ffa
chore: added decompress to unmarshal
Theis-Mathiassen Nov 11, 2025
8c6b0c8
chore: removed an old struct that appeared during rebase
Theis-Mathiassen Nov 11, 2025
158e030
feat(shard distributor): throttle shard-stat writes
AndreasHolt Nov 13, 2025
dd45ff0
Merge branch 'heartbeat-shard-statistics' of github.com:AndreasHolt/c…
AndreasHolt Nov 13, 2025
05e0d1d
fix(shard distributor): linter error
AndreasHolt Nov 13, 2025
e0779ec
feat(shard distributor): decouple shard stats write-throttling decisi…
AndreasHolt Nov 18, 2025
9546f24
Merge branch 'master' into heartbeat-shard-statistics
AndreasHolt Nov 19, 2025
db70702
fix(shard-distributor): inverted condition in shard stats cleanup loop
AndreasHolt Nov 19, 2025
481f9c6
chore(shard-distributor): did some formatting, and use current load i…
Theis-Mathiassen Nov 19, 2025
f754dd6
Merge branch 'master' into heartbeat-shard-statistics
AndreasHolt Dec 2, 2025
3366828
fix(shard distributor): decouple shard assignment from stats writes
AndreasHolt Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions common/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,9 @@ type (
}

LeaderProcess struct {
Period time.Duration `yaml:"period"`
HeartbeatTTL time.Duration `yaml:"heartbeatTTL"`
Period time.Duration `yaml:"period"`
HeartbeatTTL time.Duration `yaml:"heartbeatTTL"`
ShardStatsTTL time.Duration `yaml:"shardStatsTTL"`
}
)

Expand Down
1 change: 1 addition & 0 deletions config/development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,4 @@ shardDistribution:
process:
period: 1s
heartbeatTTL: 2s
shardStatsTTL: 60s
9 changes: 7 additions & 2 deletions service/sharddistributor/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@ type (
}

LeaderProcess struct {
Period time.Duration `yaml:"period"`
HeartbeatTTL time.Duration `yaml:"heartbeatTTL"`
Period time.Duration `yaml:"period"`
HeartbeatTTL time.Duration `yaml:"heartbeatTTL"`
ShardStatsTTL time.Duration `yaml:"shardStatsTTL"`
}
)

Expand All @@ -97,6 +98,10 @@ const (
MigrationModeONBOARDED = "onboarded"
)

const (
DefaultShardStatsTTL = time.Minute
)

// ConfigMode maps string migration mode values to types.MigrationMode
var ConfigMode = map[string]types.MigrationMode{
MigrationModeINVALID: types.MigrationModeINVALID,
Expand Down
15 changes: 10 additions & 5 deletions service/sharddistributor/leader/process/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,15 @@ func NewProcessorFactory(
timeSource clock.TimeSource,
cfg config.ShardDistribution,
) Factory {
if cfg.Process.Period == 0 {
if cfg.Process.Period <= 0 {
cfg.Process.Period = _defaultPeriod
}
if cfg.Process.HeartbeatTTL == 0 {
if cfg.Process.HeartbeatTTL <= 0 {
cfg.Process.HeartbeatTTL = _defaultHearbeatTTL
}
if cfg.Process.ShardStatsTTL <= 0 {
cfg.Process.ShardStatsTTL = config.DefaultShardStatsTTL
}

return &processorFactory{
logger: logger,
Expand Down Expand Up @@ -267,6 +270,7 @@ func (p *namespaceProcessor) identifyStaleExecutors(namespaceState *store.Namesp
func (p *namespaceProcessor) identifyStaleShardStats(namespaceState *store.NamespaceState) []string {
activeShards := make(map[string]struct{})
now := p.timeSource.Now().UTC()
shardStatsTTL := p.cfg.ShardStatsTTL

// 1. build set of active executors

Expand All @@ -278,7 +282,8 @@ func (p *namespaceProcessor) identifyStaleShardStats(namespaceState *store.Names
}

isActive := executor.Status == types.ExecutorStatusACTIVE
isNotStale := now.Sub(executor.LastHeartbeat) <= p.cfg.HeartbeatTTL
lastHeartbeat := executor.LastHeartbeat
isNotStale := !lastHeartbeat.IsZero() && now.Sub(lastHeartbeat) <= shardStatsTTL
if isActive && isNotStale {
for shardID := range assignedState.AssignedShards {
activeShards[shardID] = struct{}{}
Expand All @@ -303,8 +308,8 @@ func (p *namespaceProcessor) identifyStaleShardStats(namespaceState *store.Names
if _, ok := activeShards[shardID]; ok {
continue
}
recentUpdate := !stats.LastUpdateTime.IsZero() && now.Sub(stats.LastUpdateTime) <= p.cfg.HeartbeatTTL
recentMove := !stats.LastMoveTime.IsZero() && now.Sub(stats.LastMoveTime) <= p.cfg.HeartbeatTTL
recentUpdate := !stats.LastUpdateTime.IsZero() && now.Sub(stats.LastUpdateTime) <= shardStatsTTL
recentMove := !stats.LastMoveTime.IsZero() && now.Sub(stats.LastMoveTime) <= shardStatsTTL
if recentUpdate || recentMove {
// Preserve stats that have been updated recently to allow cooldown/load history to
// survive executor churn. These shards are likely awaiting reassignment,
Expand Down
8 changes: 5 additions & 3 deletions service/sharddistributor/leader/process/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ func setupProcessorTest(t *testing.T, namespaceType string) *testDependencies {
mockedClock,
config.ShardDistribution{
Process: config.LeaderProcess{
Period: time.Second,
HeartbeatTTL: time.Second,
Period: time.Second,
HeartbeatTTL: time.Second,
ShardStatsTTL: 10 * time.Second,
},
},
),
Expand Down Expand Up @@ -267,10 +268,11 @@ func TestCleanupStaleShardStats(t *testing.T) {
},
}

staleCutoff := now.Add(-11 * time.Second)
shardStats := map[string]store.ShardStatistics{
"shard-1": {SmoothedLoad: 1.0, LastUpdateTime: now, LastMoveTime: now},
"shard-2": {SmoothedLoad: 2.0, LastUpdateTime: now, LastMoveTime: now},
"shard-3": {SmoothedLoad: 3.0, LastUpdateTime: now.Add(-2 * time.Second), LastMoveTime: now.Add(-2 * time.Second)},
"shard-3": {SmoothedLoad: 3.0, LastUpdateTime: staleCutoff, LastMoveTime: staleCutoff},
}

namespaceState := &store.NamespaceState{
Expand Down
168 changes: 153 additions & 15 deletions service/sharddistributor/store/etcd/executorstore/etcdstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/json"
"errors"
"fmt"
"math"
"time"

clientv3 "go.etcd.io/etcd/client/v3"
Expand All @@ -32,14 +33,20 @@ var (
const deleteShardStatsBatchSize = 64

type executorStoreImpl struct {
client *clientv3.Client
prefix string
logger log.Logger
shardCache *shardcache.ShardToExecutorCache
timeSource clock.TimeSource
recordWriter *common.RecordWriter
client *clientv3.Client
prefix string
logger log.Logger
shardCache *shardcache.ShardToExecutorCache
timeSource clock.TimeSource
recordWriter *common.RecordWriter
maxStatsPersistInterval time.Duration // Max interval before we force a shard-stat persist.
}

// Constants for gating shard statistics writes to reduce etcd load.
const (
shardStatsEpsilon = 0.05
)

// shardStatisticsUpdate holds the staged statistics for a shard so we can write them
// to etcd after the main AssignShards transaction commits.
type shardStatisticsUpdate struct {
Expand Down Expand Up @@ -97,13 +104,19 @@ func NewStore(p ExecutorStoreParams) (store.Store, error) {
return nil, fmt.Errorf("create record writer: %w", err)
}

shardStatsTTL := p.Cfg.Process.ShardStatsTTL
if shardStatsTTL <= 0 {
shardStatsTTL = config.DefaultShardStatsTTL
}

store := &executorStoreImpl{
client: etcdClient,
prefix: etcdCfg.Prefix,
logger: p.Logger,
shardCache: shardCache,
timeSource: timeSource,
recordWriter: recordWriter,
client: etcdClient,
prefix: etcdCfg.Prefix,
logger: p.Logger,
shardCache: shardCache,
timeSource: timeSource,
recordWriter: recordWriter,
maxStatsPersistInterval: deriveStatsPersistInterval(shardStatsTTL),
}

p.Lifecycle.Append(fx.StartStopHook(store.Start, store.Stop))
Expand Down Expand Up @@ -165,9 +178,124 @@ func (s *executorStoreImpl) RecordHeartbeat(ctx context.Context, namespace, exec
if err != nil {
return fmt.Errorf("record heartbeat: %w", err)
}

s.recordShardStatistics(ctx, namespace, executorID, request.ReportedShards)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's discuss if this should happen syncrhronously when we record heartbeat


return nil
}

func deriveStatsPersistInterval(shardStatsTTL time.Duration) time.Duration {
if shardStatsTTL <= time.Second {
return time.Second
}
return shardStatsTTL - time.Second
}

func (s *executorStoreImpl) recordShardStatistics(ctx context.Context, namespace, executorID string, reported map[string]*types.ShardStatusReport) {
if len(reported) == 0 {
return
}

now := s.timeSource.Now().UTC()

for shardID, report := range reported {
if report == nil {
s.logger.Warn("empty report; skipping EWMA update",
tag.ShardNamespace(namespace),
tag.ShardExecutor(executorID),
tag.ShardKey(shardID),
)
continue
}

load := report.ShardLoad
if math.IsNaN(load) || math.IsInf(load, 0) {
s.logger.Warn(
"invalid shard load reported; skipping EWMA update",
tag.ShardNamespace(namespace),
tag.ShardExecutor(executorID),
tag.ShardKey(shardID),
)
continue
}

shardStatsKey := etcdkeys.BuildShardKey(s.prefix, namespace, shardID, etcdkeys.ShardStatisticsKey)

statsResp, err := s.client.Get(ctx, shardStatsKey)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are performing a get operation for each shard for each heartbeat and I am not sure etcd is handling this

if err != nil {
s.logger.Warn(
"failed to read shard statistics for heartbeat update",
tag.ShardNamespace(namespace),
tag.ShardExecutor(executorID),
tag.ShardKey(shardID),
tag.Error(err),
)
continue
}

var stats etcdtypes.ShardStatistics
if len(statsResp.Kvs) > 0 {
if err := common.DecompressAndUnmarshal(statsResp.Kvs[0].Value, &stats); err != nil {
s.logger.Warn(
"failed to unmarshal shard statistics for heartbeat update",
tag.ShardNamespace(namespace),
tag.ShardExecutor(executorID),
tag.ShardKey(shardID),
tag.Error(err),
)
continue
}
}

// Update smoothed load via EWMA.
prevSmoothed := stats.SmoothedLoad
prevUpdate := stats.LastUpdateTime.ToTime()
newSmoothed := ewmaSmoothedLoad(prevSmoothed, load, prevUpdate, now)

// Decide whether to persist this update. We always persist if this is the
// first observation (prevUpdate == 0). Otherwise, if the change is small
// and the previous persist is recent, skip the write to reduce etcd load.
shouldPersist := true
if !prevUpdate.IsZero() {
age := now.Sub(prevUpdate)
delta := math.Abs(newSmoothed - prevSmoothed)
if delta < shardStatsEpsilon && age < s.maxStatsPersistInterval {
shouldPersist = false
}
}

if !shouldPersist {
// Skip persisting, proceed to next shard.
continue
}

stats.SmoothedLoad = newSmoothed
stats.LastUpdateTime = etcdtypes.Time(now)

payload, err := json.Marshal(stats)
if err != nil {
s.logger.Warn(
"failed to marshal shard statistics after heartbeat",
tag.ShardNamespace(namespace),
tag.ShardExecutor(executorID),
tag.ShardKey(shardID),
tag.Error(err),
)
continue
}

if _, err := s.client.Put(ctx, shardStatsKey, string(payload)); err != nil {
s.logger.Warn(
"failed to persist shard statistics from heartbeat",
tag.ShardNamespace(namespace),
tag.ShardExecutor(executorID),
tag.ShardKey(shardID),
tag.Error(err),
)
}
}
}

// GetHeartbeat retrieves the last known heartbeat state for a single executor.
func (s *executorStoreImpl) GetHeartbeat(ctx context.Context, namespace string, executorID string) (*store.HeartbeatState, *store.AssignedState, error) {
// The prefix for all keys related to a single executor.
Expand Down Expand Up @@ -476,9 +604,7 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID,
}

now := s.timeSource.Now().UTC()
statsModRevision := int64(0)
if len(statsResp.Kvs) > 0 {
statsModRevision = statsResp.Kvs[0].ModRevision
if err := common.DecompressAndUnmarshal(statsResp.Kvs[0].Value, &shardStats); err != nil {
return fmt.Errorf("parse shard statistics: %w", err)
}
Expand Down Expand Up @@ -543,7 +669,6 @@ func (s *executorStoreImpl) AssignShard(ctx context.Context, namespace, shardID,
comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(statusKey), "=", statusModRev))
// b) Check that neither the assigned_state nor shard statistics were modified concurrently.
comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(assignedState), "=", modRevision))
comparisons = append(comparisons, clientv3.Compare(clientv3.ModRevision(shardStatsKey), "=", statsModRevision))
// c) Check that the cache is up to date.
cmp, err := s.shardCache.GetExecutorModRevisionCmp(namespace)
if err != nil {
Expand Down Expand Up @@ -764,3 +889,16 @@ func (s *executorStoreImpl) applyShardStatisticsUpdates(ctx context.Context, nam
}
}
}

func ewmaSmoothedLoad(prev, current float64, lastUpdate, now time.Time) float64 {
const tau = 30 * time.Second // smaller = more responsive, larger = smoother
if lastUpdate.IsZero() || tau <= 0 {
return current
}
if now.Before(lastUpdate) {
return current
}
dt := now.Sub(lastUpdate)
alpha := 1 - math.Exp(-dt.Seconds()/tau.Seconds())
return (1-alpha)*prev + alpha*current
}
Loading