Skip to content

Commit fd47dd9

Browse files
committed
Fix bug in redisshard initialization
1 parent 24f7ee9 commit fd47dd9

File tree

15 files changed

+234
-120
lines changed

15 files changed

+234
-120
lines changed

controllers/redisshard_controller.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ func (r *RedisShardReconciler) setRedisRoles(ctx context.Context, key types.Name
112112
if err != nil {
113113
return &sharded.Shard{Name: key.Name}, &ctrl.Result{}, err
114114
}
115+
if pod.Status.PodIP == "" {
116+
log.Info("waiting for pod IP to be allocated")
117+
return &sharded.Shard{Name: key.Name}, &ctrl.Result{RequeueAfter: 5 * time.Second}, nil
118+
}
119+
115120
redisURLs[fmt.Sprintf("%s-%d", serviceName, i)] = fmt.Sprintf("redis://%s:%d", pod.Status.PodIP, 6379)
116121
if int(masterIndex) == i {
117122
masterHostPort = fmt.Sprintf("%s:%d", pod.Status.PodIP, 6379)

controllers/sentinel_controller.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,19 @@ import (
3131
"github.com/3scale/saas-operator/pkg/redis/sharded"
3232
"github.com/go-logr/logr"
3333
grafanav1alpha1 "github.com/grafana-operator/grafana-operator/v4/api/integreatly/v1alpha1"
34+
"golang.org/x/time/rate"
3435
appsv1 "k8s.io/api/apps/v1"
3536
corev1 "k8s.io/api/core/v1"
3637
policyv1 "k8s.io/api/policy/v1"
3738
"k8s.io/apimachinery/pkg/api/equality"
3839
"k8s.io/apimachinery/pkg/types"
40+
"k8s.io/client-go/util/workqueue"
3941
"k8s.io/utils/pointer"
4042
ctrl "sigs.k8s.io/controller-runtime"
43+
"sigs.k8s.io/controller-runtime/pkg/controller"
4144
"sigs.k8s.io/controller-runtime/pkg/handler"
4245
"sigs.k8s.io/controller-runtime/pkg/log"
46+
"sigs.k8s.io/controller-runtime/pkg/ratelimiter"
4347
"sigs.k8s.io/controller-runtime/pkg/source"
4448
)
4549

@@ -171,7 +175,7 @@ func (r *SentinelReconciler) reconcileStatus(ctx context.Context, instance *saas
171175
masterError := &sharded.DiscoveryError_Master_SingleServerFailure{}
172176
slaveError := &sharded.DiscoveryError_Slave_SingleServerFailure{}
173177
if errors.As(merr, masterError) || errors.As(merr, slaveError) {
174-
log.Error(merr, "errors occurred during discovery")
178+
log.Error(merr, "DiscoveryError")
175179
}
176180

177181
shards := make(saasv1alpha1.MonitoredShards, len(cluster.Shards))
@@ -215,5 +219,19 @@ func (r *SentinelReconciler) SetupWithManager(mgr ctrl.Manager) error {
215219
Owns(&grafanav1alpha1.GrafanaDashboard{}).
216220
Owns(&corev1.ConfigMap{}).
217221
Watches(&source.Channel{Source: r.SentinelEvents.GetChannel()}, &handler.EnqueueRequestForObject{}).
222+
WithOptions(controller.Options{
223+
RateLimiter: AggressiveRateLimiter(),
224+
}).
218225
Complete(r)
219226
}
227+
228+
func AggressiveRateLimiter() ratelimiter.RateLimiter {
229+
// return workqueue.DefaultControllerRateLimiter()
230+
return workqueue.NewMaxOfRateLimiter(
231+
// First retries are more spaced that default
232+
// Max retry time is limited to 10 seconds
233+
workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 10*time.Second),
234+
// 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item)
235+
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)},
236+
)
237+
}

controllers/twemproxyconfig_controller.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939
"k8s.io/utils/pointer"
4040
ctrl "sigs.k8s.io/controller-runtime"
4141
"sigs.k8s.io/controller-runtime/pkg/client"
42+
"sigs.k8s.io/controller-runtime/pkg/controller"
4243
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
4344
"sigs.k8s.io/controller-runtime/pkg/handler"
4445
"sigs.k8s.io/controller-runtime/pkg/log"
@@ -82,10 +83,10 @@ func (r *TwemproxyConfigReconciler) Reconcile(ctx context.Context, req ctrl.Requ
8283
gen, err := twemproxyconfig.NewGenerator(
8384
ctx, instance, r.Client, r.Pool, logger.WithName("generator"),
8485
)
85-
8686
if err != nil {
8787
return ctrl.Result{}, err
8888
}
89+
8990
cm, err := gen.ConfigMap().Build(ctx, r.Client)
9091
if err != nil {
9192
return ctrl.Result{}, err
@@ -268,5 +269,8 @@ func (r *TwemproxyConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
268269
Owns(&corev1.ConfigMap{}).
269270
Owns(&grafanav1alpha1.GrafanaDashboard{}).
270271
Watches(&source.Channel{Source: r.SentinelEvents.GetChannel()}, &handler.EnqueueRequestForObject{}).
272+
WithOptions(controller.Options{
273+
RateLimiter: AggressiveRateLimiter(),
274+
}).
271275
Complete(r)
272276
}

pkg/generators/twemproxyconfig/generator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func NewGenerator(ctx context.Context, instance *saasv1alpha1.TwemproxyConfig, c
114114
case true:
115115
merr := shardedCluster.SentinelDiscover(ctx, sharded.SlaveReadOnlyDiscoveryOpt)
116116
if merr != nil {
117-
log.Error(merr, "errors occurred during discovery")
117+
log.Error(merr, "DiscoveryError")
118118
// Only sentinel/master discovery errors should return.
119119
// Slave failures will just failover to the master without returning error (although it will be logged)
120120
sentinelError := &sharded.DiscoveryError_Sentinel_Failure{}

pkg/generators/twemproxyconfig/generator_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"github.com/3scale/saas-operator/pkg/util"
1414
"github.com/go-logr/logr"
1515
"github.com/go-test/deep"
16+
"github.com/google/go-cmp/cmp"
17+
"github.com/google/go-cmp/cmp/cmpopts"
1618
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1719
"sigs.k8s.io/controller-runtime/pkg/client"
1820
)
@@ -634,7 +636,7 @@ func TestNewGenerator(t *testing.T) {
634636
return
635637
}
636638
deep.CompareUnexportedFields = true
637-
if diff := deep.Equal(got, tt.want); len(diff) != 0 {
639+
if diff := cmp.Diff(got, tt.want, cmp.AllowUnexported(Generator{}), cmpopts.IgnoreUnexported(twemproxy.Server{})); len(diff) != 0 {
638640
t.Errorf("NewGenerator() = diff %v", diff)
639641
}
640642
})

pkg/redis/client/fake_client.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ func (fc *FakeClient) SentinelPing(ctx context.Context) error {
9393
return rsp.InjectError()
9494
}
9595

96+
func (fc *FakeClient) SentinelDo(ctx context.Context, args ...interface{}) (interface{}, error) {
97+
rsp := fc.pop()
98+
return rsp.InjectResponse(), rsp.InjectError()
99+
}
100+
96101
func (fc *FakeClient) RedisRole(ctx context.Context) (interface{}, error) {
97102
rsp := fc.pop()
98103
return rsp.InjectResponse(), rsp.InjectError()
@@ -125,6 +130,11 @@ func (fc *FakeClient) RedisDebugSleep(ctx context.Context, duration time.Duratio
125130
return nil
126131
}
127132

133+
func (fc *FakeClient) RedisDo(ctx context.Context, args ...interface{}) (interface{}, error) {
134+
rsp := fc.pop()
135+
return rsp.InjectResponse(), rsp.InjectError()
136+
}
137+
128138
func (fc *FakeClient) pop() (fakeRsp FakeResponse) {
129139
fakeRsp, fc.Responses = fc.Responses[0], fc.Responses[1:]
130140
return fakeRsp

pkg/redis/client/goredis_client.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ func (c *GoRedisClient) SentinelPing(ctx context.Context) error {
113113
return err
114114
}
115115

116+
func (c *GoRedisClient) SentinelDo(ctx context.Context, args ...interface{}) (interface{}, error) {
117+
val, err := c.redis.Do(ctx, args...).Result()
118+
return val, err
119+
}
120+
116121
func (c *GoRedisClient) RedisRole(ctx context.Context) (interface{}, error) {
117122

118123
val, err := c.redis.Do(ctx, "role").Result()
@@ -142,3 +147,8 @@ func (c *GoRedisClient) RedisDebugSleep(ctx context.Context, duration time.Durat
142147
_, err := c.redis.Do(ctx, "debug", "sleep", fmt.Sprintf("%.1f", duration.Seconds())).Result()
143148
return err
144149
}
150+
151+
func (c *GoRedisClient) RedisDo(ctx context.Context, args ...interface{}) (interface{}, error) {
152+
val, err := c.redis.Do(ctx, args...).Result()
153+
return val, err
154+
}

pkg/redis/client/interface.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ type TestableInterface interface {
1818
SentinelSet(context.Context, string, string, string) error
1919
SentinelPSubscribe(context.Context, ...string) (<-chan *redis.Message, func() error)
2020
SentinelInfoCache(context.Context) (interface{}, error)
21+
SentinelDo(context.Context, ...interface{}) (interface{}, error)
2122
SentinelPing(ctx context.Context) error
2223
RedisRole(context.Context) (interface{}, error)
2324
RedisConfigGet(context.Context, string) ([]interface{}, error)
2425
RedisConfigSet(context.Context, string, string) error
2526
RedisSlaveOf(context.Context, string, string) error
2627
RedisDebugSleep(context.Context, time.Duration) error
28+
RedisDo(context.Context, ...interface{}) (interface{}, error)
2729
Close() error
2830
}
2931

pkg/redis/server/server.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ func (srv *Server) CloseClient() error {
7070
return srv.client.Close()
7171
}
7272

73+
func (srv *Server) GetClient() client.TestableInterface {
74+
return srv.client
75+
}
76+
7377
func (srv *Server) GetHost() string {
7478
return srv.host
7579
}
@@ -170,7 +174,7 @@ func (srv *Server) SentinelInfoCache(ctx context.Context) (client.SentinelInfoCa
170174
// When sentinel is unable to reach the redis slave the info field can be nil
171175
// so we have to check this to avoid panics
172176
if server.([]interface{})[1] != nil {
173-
info := infoStringToMap(server.([]interface{})[1].(string))
177+
info := InfoStringToMap(server.([]interface{})[1].(string))
174178
result[shard][info["run_id"]] = client.RedisServerInfoCache{
175179
CacheAge: time.Duration(server.([]interface{})[0].(int64)) * time.Millisecond,
176180
Info: info,
@@ -248,7 +252,7 @@ func islice2imap(in interface{}) map[string]interface{} {
248252
return m
249253
}
250254

251-
func infoStringToMap(in string) map[string]string {
255+
func InfoStringToMap(in string) map[string]string {
252256

253257
m := map[string]string{}
254258
scanner := bufio.NewScanner(strings.NewReader(in))

pkg/redis/server/server_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1409,7 +1409,7 @@ func Test_infoStringToMap(t *testing.T) {
14091409
}
14101410
for _, tt := range tests {
14111411
t.Run(tt.name, func(t *testing.T) {
1412-
if got := infoStringToMap(tt.args.in); !reflect.DeepEqual(got, tt.want) {
1412+
if got := InfoStringToMap(tt.args.in); !reflect.DeepEqual(got, tt.want) {
14131413
t.Errorf("infoStringToMap() = %v, want %v", got, tt.want)
14141414
}
14151415
})

pkg/redis/sharded/redis_server.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
package sharded
22

33
import (
4+
"context"
5+
"fmt"
6+
47
"github.com/3scale/saas-operator/pkg/redis/client"
58
redis "github.com/3scale/saas-operator/pkg/redis/server"
9+
"sigs.k8s.io/controller-runtime/pkg/log"
610
)
711

812
type RedisServer struct {
@@ -31,3 +35,82 @@ func NewRedisServerFromParams(srv *redis.Server, role client.Role, config map[st
3135
Config: config,
3236
}
3337
}
38+
39+
func (srv *RedisServer) InitMaster(ctx context.Context) (bool, error) {
40+
logger := log.FromContext(ctx, "function", "(*RedisServer).InitMaster")
41+
42+
role, slaveof, err := srv.RedisRole(ctx)
43+
if err != nil {
44+
return false, err
45+
}
46+
47+
switch role {
48+
case client.Slave:
49+
50+
if slaveof == "127.0.0.1" {
51+
// needs initialization
52+
if err := srv.RedisSlaveOf(ctx, "NO", "ONE"); err != nil {
53+
return false, err
54+
}
55+
logger.Info(fmt.Sprintf("configured %s|%s as master", srv.GetAlias(), srv.ID()))
56+
return true, nil
57+
58+
} else {
59+
srv.Role = client.Slave
60+
}
61+
62+
case client.Master:
63+
srv.Role = client.Master
64+
}
65+
66+
return false, nil
67+
}
68+
69+
func (srv *RedisServer) InitSlave(ctx context.Context, master *RedisServer) (bool, error) {
70+
71+
logger := log.FromContext(ctx, "function", "(*RedisServer).InitSlave")
72+
73+
role, slaveof, err := srv.RedisRole(ctx)
74+
if err != nil {
75+
return false, err
76+
}
77+
78+
switch role {
79+
case client.Slave:
80+
81+
// needs initialization
82+
if slaveof == "127.0.0.1" {
83+
// validate first that the master is ready
84+
role, _, err := master.RedisRole(ctx)
85+
if err != nil || role != client.Master {
86+
err := fmt.Errorf("shard master %s|%s is not ready", master.GetAlias(), master.ID())
87+
logger.Error(err, "slave init failed")
88+
return false, err
89+
90+
} else {
91+
// if master ok, init slave
92+
if err := srv.RedisSlaveOf(ctx, master.GetHost(), master.GetPort()); err != nil {
93+
return false, err
94+
}
95+
logger.Info(fmt.Sprintf("configured %s|%s as slave", srv.GetAlias(), srv.ID()))
96+
return true, nil
97+
}
98+
99+
} else {
100+
srv.Role = client.Slave
101+
// FOR DEBUGGING
102+
// val, err := srv.GetClient().RedisDo(ctx, "info", "replication")
103+
// if err != nil {
104+
// logger.Error(err, "unable to get info")
105+
// } else {
106+
// logger.Info("dump replication status", "Slave", srv.GetAlias())
107+
// logger.Info(fmt.Sprintf("%s", redis.InfoStringToMap(val.(string))))
108+
// }
109+
}
110+
111+
case client.Master:
112+
srv.Role = client.Master
113+
}
114+
115+
return false, nil
116+
}

pkg/redis/sharded/redis_shard.go

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package sharded
33
import (
44
"context"
55
"fmt"
6-
"net"
76
"sort"
87
"strings"
98

@@ -216,44 +215,36 @@ func (shard *Shard) GetServerByID(hostport string) (*RedisServer, error) {
216215

217216
// Init initializes the shard if not already initialized
218217
func (shard *Shard) Init(ctx context.Context, masterHostPort string) ([]string, error) {
219-
logger := log.FromContext(ctx, "function", "(*Shard).Init")
220-
changed := []string{}
218+
merr := util.MultiError{}
219+
listChanged := []string{}
220+
var master *RedisServer
221221

222-
for idx, srv := range shard.Servers {
223-
role, slaveof, err := srv.RedisRole(ctx)
224-
if err != nil {
225-
return changed, err
222+
// Init the master
223+
for _, srv := range shard.Servers {
224+
if srv.ID() == masterHostPort {
225+
master = srv
226+
changed, err := master.InitMaster(ctx)
227+
if err != nil {
228+
return listChanged, append(merr, err)
229+
}
230+
if changed {
231+
listChanged = append(listChanged, master.ID())
232+
}
226233
}
234+
}
227235

228-
if role == client.Slave {
229-
230-
if slaveof == "127.0.0.1" {
231-
232-
if masterHostPort == srv.ID() {
233-
if err := srv.RedisSlaveOf(ctx, "NO", "ONE"); err != nil {
234-
return changed, err
235-
}
236-
logger.Info(fmt.Sprintf("configured %s as master", srv.ID()))
237-
changed = append(changed, srv.ID())
238-
} else {
239-
host, port, _ := net.SplitHostPort(masterHostPort)
240-
if err := srv.RedisSlaveOf(ctx, host, port); err != nil {
241-
return changed, err
242-
}
243-
logger.Info(fmt.Sprintf("configured %s as slave", srv.ID()))
244-
changed = append(changed, srv.ID())
245-
}
246-
247-
} else {
248-
shard.Servers[idx].Role = client.Slave
236+
// Init the slaves
237+
for _, srv := range shard.Servers {
238+
if srv.ID() != masterHostPort {
239+
changed, err := srv.InitSlave(ctx, master)
240+
if err != nil {
241+
merr = append(merr, err)
242+
}
243+
if changed {
244+
listChanged = append(listChanged, srv.ID())
249245
}
250-
251-
} else if role == client.Master {
252-
shard.Servers[idx].Role = client.Master
253-
} else {
254-
return changed, fmt.Errorf("unable to get role for server %s", srv.ID())
255246
}
256247
}
257248

258-
return changed, nil
249+
return listChanged, merr.ErrorOrNil()
259250
}

0 commit comments

Comments
 (0)