diff --git a/service/sharddistributor/canary/pinger/canary_client_mock.go b/service/sharddistributor/canary/pinger/canary_client_mock.go new file mode 100644 index 00000000000..8cbf9ce938c --- /dev/null +++ b/service/sharddistributor/canary/pinger/canary_client_mock.go @@ -0,0 +1,64 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/uber/cadence/.gen/proto/sharddistributor/v1 (interfaces: ShardDistributorExecutorCanaryAPIYARPCClient) +// +// Generated by this command: +// +// mockgen -package pinger -destination canary_client_mock.go github.com/uber/cadence/.gen/proto/sharddistributor/v1 ShardDistributorExecutorCanaryAPIYARPCClient +// + +// Package pinger is a generated GoMock package. +package pinger + +import ( + context "context" + reflect "reflect" + + gomock "go.uber.org/mock/gomock" + yarpc "go.uber.org/yarpc" + + sharddistributorv1 "github.com/uber/cadence/.gen/proto/sharddistributor/v1" +) + +// MockShardDistributorExecutorCanaryAPIYARPCClient is a mock of ShardDistributorExecutorCanaryAPIYARPCClient interface. +type MockShardDistributorExecutorCanaryAPIYARPCClient struct { + ctrl *gomock.Controller + recorder *MockShardDistributorExecutorCanaryAPIYARPCClientMockRecorder + isgomock struct{} +} + +// MockShardDistributorExecutorCanaryAPIYARPCClientMockRecorder is the mock recorder for MockShardDistributorExecutorCanaryAPIYARPCClient. +type MockShardDistributorExecutorCanaryAPIYARPCClientMockRecorder struct { + mock *MockShardDistributorExecutorCanaryAPIYARPCClient +} + +// NewMockShardDistributorExecutorCanaryAPIYARPCClient creates a new mock instance. +func NewMockShardDistributorExecutorCanaryAPIYARPCClient(ctrl *gomock.Controller) *MockShardDistributorExecutorCanaryAPIYARPCClient { + mock := &MockShardDistributorExecutorCanaryAPIYARPCClient{ctrl: ctrl} + mock.recorder = &MockShardDistributorExecutorCanaryAPIYARPCClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockShardDistributorExecutorCanaryAPIYARPCClient) EXPECT() *MockShardDistributorExecutorCanaryAPIYARPCClientMockRecorder { + return m.recorder +} + +// Ping mocks base method. +func (m *MockShardDistributorExecutorCanaryAPIYARPCClient) Ping(arg0 context.Context, arg1 *sharddistributorv1.PingRequest, arg2 ...yarpc.CallOption) (*sharddistributorv1.PingResponse, error) { + m.ctrl.T.Helper() + varargs := []any{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "Ping", varargs...) + ret0, _ := ret[0].(*sharddistributorv1.PingResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Ping indicates an expected call of Ping. +func (mr *MockShardDistributorExecutorCanaryAPIYARPCClientMockRecorder) Ping(arg0, arg1 any, arg2 ...any) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]any{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Ping", reflect.TypeOf((*MockShardDistributorExecutorCanaryAPIYARPCClient)(nil).Ping), varargs...) +} diff --git a/service/sharddistributor/canary/pinger/pinger.go b/service/sharddistributor/canary/pinger/pinger.go new file mode 100644 index 00000000000..641fb8434ac --- /dev/null +++ b/service/sharddistributor/canary/pinger/pinger.go @@ -0,0 +1,118 @@ +package pinger + +import ( + "context" + "fmt" + "math/rand" + "sync" + "time" + + "go.uber.org/fx" + "go.uber.org/yarpc" + "go.uber.org/zap" + + sharddistributorv1 "github.com/uber/cadence/.gen/proto/sharddistributor/v1" + "github.com/uber/cadence/common/backoff" + "github.com/uber/cadence/common/clock" + "github.com/uber/cadence/service/sharddistributor/client/spectatorclient" +) + +//go:generate mockgen -package $GOPACKAGE -destination canary_client_mock.go github.com/uber/cadence/.gen/proto/sharddistributor/v1 ShardDistributorExecutorCanaryAPIYARPCClient + +const ( + pingInterval = 1 * time.Second + pingJitterCoeff = 0.1 // 10% jitter + pingTimeout = 5 * time.Second +) + +// Pinger periodically pings shard owners in the fixed namespace +type Pinger struct { + logger *zap.Logger + timeSource clock.TimeSource + canaryClient sharddistributorv1.ShardDistributorExecutorCanaryAPIYARPCClient + namespace string + numShards int + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup +} + +// Params are the parameters for creating a Pinger +type Params struct { + fx.In + + Logger *zap.Logger + TimeSource clock.TimeSource + CanaryClient sharddistributorv1.ShardDistributorExecutorCanaryAPIYARPCClient +} + +// NewPinger creates a new Pinger for the fixed namespace +func NewPinger(params Params, namespace string, numShards int) *Pinger { + return &Pinger{ + logger: params.Logger, + timeSource: params.TimeSource, + canaryClient: params.CanaryClient, + namespace: namespace, + numShards: numShards, + } +} + +// Start begins the periodic ping loop +func (p *Pinger) Start(ctx context.Context) { + p.logger.Info("Starting canary pinger", zap.String("namespace", p.namespace), zap.Int("num_shards", p.numShards)) + p.ctx, p.cancel = context.WithCancel(context.WithoutCancel(ctx)) + p.wg.Add(1) + go p.pingLoop() +} + +// Stop stops the ping loop +func (p *Pinger) Stop() { + if p.cancel != nil { + p.cancel() + } + p.wg.Wait() +} + +func (p *Pinger) pingLoop() { + defer p.wg.Done() + + ticker := p.timeSource.NewTicker(backoff.JitDuration(pingInterval, pingJitterCoeff)) + defer ticker.Stop() + + for { + select { + case <-p.ctx.Done(): + p.logger.Info("Pinger context done, stopping") + return + case <-ticker.Chan(): + p.pingRandomShard() + ticker.Reset(backoff.JitDuration(pingInterval, pingJitterCoeff)) + } + } +} + +// Pings a random shard in the namespace and logs the results +func (p *Pinger) pingRandomShard() { + shardNum := rand.Intn(p.numShards) + shardKey := fmt.Sprintf("%d", shardNum) + + request := &sharddistributorv1.PingRequest{ + ShardKey: shardKey, + Namespace: p.namespace, + } + + ctx, cancel := context.WithTimeout(p.ctx, pingTimeout) + defer cancel() + + response, err := p.canaryClient.Ping(ctx, request, yarpc.WithShardKey(shardKey), yarpc.WithHeader(spectatorclient.NamespaceHeader, p.namespace)) + if err != nil { + p.logger.Error("Failed to ping shard", zap.String("namespace", p.namespace), zap.String("shard_key", shardKey), zap.Error(err)) + } + + // Verify response + if !response.GetOwnsShard() { + p.logger.Warn("Executor does not own shard", zap.String("namespace", p.namespace), zap.String("shard_key", shardKey), zap.String("executor_id", response.GetExecutorId())) + } + + p.logger.Info("Successfully pinged shard owner", zap.String("namespace", p.namespace), zap.String("shard_key", shardKey), zap.String("executor_id", response.GetExecutorId())) +} diff --git a/service/sharddistributor/canary/pinger/pinger_test.go b/service/sharddistributor/canary/pinger/pinger_test.go new file mode 100644 index 00000000000..f6ca8e1fed8 --- /dev/null +++ b/service/sharddistributor/canary/pinger/pinger_test.go @@ -0,0 +1,97 @@ +package pinger + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "go.uber.org/goleak" + "go.uber.org/mock/gomock" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" + + sharddistributorv1 "github.com/uber/cadence/.gen/proto/sharddistributor/v1" + "github.com/uber/cadence/common/clock" +) + +func TestPingerStartStop(t *testing.T) { + defer goleak.VerifyNone(t) + + ctrl := gomock.NewController(t) + mockClient := NewMockShardDistributorExecutorCanaryAPIYARPCClient(ctrl) + + pinger := NewPinger(Params{ + Logger: zap.NewNop(), + TimeSource: clock.NewRealTimeSource(), + CanaryClient: mockClient, + }, "test-ns", 10) + + pinger.Start(context.Background()) + pinger.Stop() +} + +func TestPingerPingRandomShard(t *testing.T) { + defer goleak.VerifyNone(t) + + cases := []struct { + name string + setupClientMock func(*MockShardDistributorExecutorCanaryAPIYARPCClient) + expectedLog string + }{ + { + name: "owns shard", + setupClientMock: func(mockClient *MockShardDistributorExecutorCanaryAPIYARPCClient) { + mockClient.EXPECT().Ping(gomock.Any(), gomock.Any(), gomock.Any()). + Return(&sharddistributorv1.PingResponse{ + OwnsShard: true, + ExecutorId: "127.0.0.1:7953", + }, nil) + }, + expectedLog: "Successfully pinged shard owner", + }, + { + name: "does not own shard", + setupClientMock: func(mockClient *MockShardDistributorExecutorCanaryAPIYARPCClient) { + mockClient.EXPECT(). + Ping(gomock.Any(), gomock.Any(), gomock.Any()). + Return(&sharddistributorv1.PingResponse{ + OwnsShard: false, + ExecutorId: "127.0.0.1:7953", + }, nil) + }, + expectedLog: "Executor does not own shard", + }, + { + name: "RPC error", + setupClientMock: func(mockClient *MockShardDistributorExecutorCanaryAPIYARPCClient) { + mockClient.EXPECT(). + Ping(gomock.Any(), gomock.Any(), gomock.Any()). + Return(nil, errors.New("network error")) + }, + expectedLog: "Failed to ping shard", + }, + } + + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + mockClient := NewMockShardDistributorExecutorCanaryAPIYARPCClient(ctrl) + zapCore, logs := observer.New(zap.InfoLevel) + logger := zap.New(zapCore) + + pinger := NewPinger(Params{ + Logger: logger, + TimeSource: clock.NewRealTimeSource(), + CanaryClient: mockClient, + }, "test-ns", 10) + pinger.ctx = context.Background() + + tt.setupClientMock(mockClient) + + pinger.pingRandomShard() + + assert.Equal(t, 1, logs.FilterMessage(tt.expectedLog).Len()) + }) + } +}