diff --git a/go.mod b/go.mod index 17ed5767d9..bfbf53bc96 100644 --- a/go.mod +++ b/go.mod @@ -189,7 +189,7 @@ replace ( k8s.io/api => k8s.io/api v0.29.2 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.29.2 k8s.io/apimachinery => k8s.io/apimachinery v0.29.2 - k8s.io/apiserver => github.com/openshift/kubernetes-apiserver v0.0.0-20240410114447-9e7c11c45dab // points to openshift-apiserver-4.16-kubernetes-1.29.2 + k8s.io/apiserver => github.com/openshift/kubernetes-apiserver v0.0.0-20250915121356-f80f5359033a // points to openshift-apiserver-4.17-kubernetes-1.29.2 k8s.io/cli-runtime => k8s.io/cli-runtime v0.29.2 k8s.io/client-go => k8s.io/client-go v0.29.2 k8s.io/cloud-provider => k8s.io/cloud-provider v0.29.2 diff --git a/go.sum b/go.sum index 25d370455b..45655bad3f 100644 --- a/go.sum +++ b/go.sum @@ -274,8 +274,8 @@ github.com/openshift/client-go v0.0.0-20240415214935-be70f772f157 h1:xbd4qHpyFnn github.com/openshift/client-go v0.0.0-20240415214935-be70f772f157/go.mod h1:Q3mt/X5xrxnR5R6BE7duF2ToLioRQJYnTYaaDS4QZTs= github.com/openshift/docker-distribution/v3 v3.0.0-20240215131201-6b2f5d2f1f43 h1:iFiveehT5yqHvAxdTwHGZLTxyxzMqP8bLcQKz6Y7NQQ= github.com/openshift/docker-distribution/v3 v3.0.0-20240215131201-6b2f5d2f1f43/go.mod h1:+fqBJ4vPYo4Uu1ZE4d+bUtTLRXfdSL3NvCZIZ9GHv58= -github.com/openshift/kubernetes-apiserver v0.0.0-20240410114447-9e7c11c45dab h1:hVEUWx+0XiMkstOLlQ5BiBZTnA7WWJJf80Dc2L2FLyM= -github.com/openshift/kubernetes-apiserver v0.0.0-20240410114447-9e7c11c45dab/go.mod h1:B0LieKVoyU7ykQvPFm7XSdIHaCHSzCzQWPFa5bqbeMQ= +github.com/openshift/kubernetes-apiserver v0.0.0-20250915121356-f80f5359033a h1:NBEp4AKf+RXdvcHAPJUFnhOg7dwnNDkLY2PuJceC3B4= +github.com/openshift/kubernetes-apiserver v0.0.0-20250915121356-f80f5359033a/go.mod h1:B0LieKVoyU7ykQvPFm7XSdIHaCHSzCzQWPFa5bqbeMQ= github.com/openshift/library-go v0.0.0-20240513090140-e22d25af5587 h1:eZ79/F6bhtqoY78KRRxp5G5yITTgVatMnj5J/cYDumI= github.com/openshift/library-go v0.0.0-20240513090140-e22d25af5587/go.mod h1:lFwyRj0XjUf25Da3Q00y+KuaxCWTJ6YzYPDX1+96nco= github.com/openshift/moby-moby v0.0.0-20190308215630-da810a85109d h1:fLITXDjxMSvUDjnXs/zljIWktbST9+Om8XbrmmM7T4I= diff --git a/vendor/k8s.io/apiserver/pkg/storage/etcd3/etcd3retry/retry_etcdclient.go b/vendor/k8s.io/apiserver/pkg/storage/etcd3/etcd3retry/retry_etcdclient.go index 12bd733f3d..b56fd2084c 100644 --- a/vendor/k8s.io/apiserver/pkg/storage/etcd3/etcd3retry/retry_etcdclient.go +++ b/vendor/k8s.io/apiserver/pkg/storage/etcd3/etcd3retry/retry_etcdclient.go @@ -2,6 +2,9 @@ package etcd3retry import ( "context" + "fmt" + "regexp" + "strings" "time" etcdrpc "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" @@ -15,7 +18,7 @@ import ( "k8s.io/klog/v2" ) -var defaultRetry = wait.Backoff{ +var DefaultRetry = wait.Backoff{ Duration: 300 * time.Millisecond, Factor: 2, // double the timeout for every failure Jitter: 0.1, @@ -36,7 +39,7 @@ func NewRetryingEtcdStorage(delegate storage.Interface) storage.Interface { // in seconds (0 means forever). If no error is returned and out is not nil, out will be // set to the read value from database. func (c *retryClient) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error { - return onError(ctx, defaultRetry, isRetriableEtcdError, func() error { + return OnError(ctx, DefaultRetry, IsRetriableErrorOnWrite, func() error { return c.Interface.Create(ctx, key, obj, out, ttl) }) } @@ -44,7 +47,7 @@ func (c *retryClient) Create(ctx context.Context, key string, obj, out runtime.O // Delete removes the specified key and returns the value that existed at that spot. // If key didn't exist, it will return NotFound storage error. func (c *retryClient) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions, validateDeletion storage.ValidateObjectFunc, cachedExistingObject runtime.Object) error { - return onError(ctx, defaultRetry, isRetriableEtcdError, func() error { + return OnError(ctx, DefaultRetry, IsRetriableErrorOnWrite, func() error { return c.Interface.Delete(ctx, key, out, preconditions, validateDeletion, cachedExistingObject) }) } @@ -58,7 +61,7 @@ func (c *retryClient) Delete(ctx context.Context, key string, out runtime.Object // and send it in an "ADDED" event, before watch starts. func (c *retryClient) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) { var ret watch.Interface - err := onError(ctx, defaultRetry, isRetriableEtcdError, func() error { + err := OnError(ctx, DefaultRetry, IsRetriableErrorOnRead, func() error { var innerErr error ret, innerErr = c.Interface.Watch(ctx, key, opts) return innerErr @@ -72,7 +75,7 @@ func (c *retryClient) Watch(ctx context.Context, key string, opts storage.ListOp // The returned contents may be delayed, but it is guaranteed that they will // match 'opts.ResourceVersion' according 'opts.ResourceVersionMatch'. func (c *retryClient) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error { - return onError(ctx, defaultRetry, isRetriableEtcdError, func() error { + return OnError(ctx, DefaultRetry, IsRetriableErrorOnRead, func() error { return c.Interface.Get(ctx, key, opts, objPtr) }) } @@ -84,7 +87,7 @@ func (c *retryClient) Get(ctx context.Context, key string, opts storage.GetOptio // The returned contents may be delayed, but it is guaranteed that they will // match 'opts.ResourceVersion' according 'opts.ResourceVersionMatch'. func (c *retryClient) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error { - return onError(ctx, defaultRetry, isRetriableEtcdError, func() error { + return OnError(ctx, DefaultRetry, IsRetriableErrorOnRead, func() error { return c.Interface.GetList(ctx, key, opts, listObj) }) } @@ -125,29 +128,71 @@ func (c *retryClient) GetList(ctx context.Context, key string, opts storage.List // ) func (c *retryClient) GuaranteedUpdate(ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool, preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, cachedExistingObject runtime.Object) error { - return onError(ctx, defaultRetry, isRetriableEtcdError, func() error { + return OnError(ctx, DefaultRetry, IsRetriableErrorOnWrite, func() error { return c.Interface.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, preconditions, tryUpdate, cachedExistingObject) }) } -// isRetriableEtcdError returns true if a retry should be attempted, otherwise false. -// errorLabel is set to a non-empty value that reflects the type of error encountered. -func isRetriableEtcdError(err error) (errorLabel string, retry bool) { - if err != nil { - if etcdError, ok := etcdrpc.Error(err).(etcdrpc.EtcdError); ok { - if etcdError.Code() == codes.Unavailable { - errorLabel = "Unavailable" - retry = true - } - } +// These errors are coming back from the k8s.io/apiserver storage.Interface, not directly from an +// etcd client. Classifying them can be fragile since the storage methods may not return etcd client +// errors directly. +var errorLabelsBySuffix = map[string]string{ + "etcdserver: leader changed": "LeaderChanged", + "etcdserver: no leader": "NoLeader", + "raft proposal dropped": "ProposalDropped", + + "etcdserver: request timed out": "Timeout", + "etcdserver: request timed out, possibly due to previous leader failure": "Timeout", + "etcdserver: request timed out, possible due to connection lost": "Timeout", + "etcdserver: request timed out, waiting for the applied index took too long": "Timeout", + "etcdserver: server stopped": "Stopped", +} + +var retriableWriteErrorSuffixes = func() *regexp.Regexp { + // This list should include only errors the caller is certain have no side effects. + suffixes := []string{ + "etcdserver: leader changed", + "etcdserver: no leader", + "raft proposal dropped", + } + return regexp.MustCompile(fmt.Sprintf(`(%s)$`, strings.Join(suffixes, `|`))) +}() + +// IsRetriableErrorOnWrite returns true if and only if a retry should be attempted when the provided +// error is returned from a write attempt. If the error is retriable, a non-empty string classifying +// the error is also returned. +func IsRetriableErrorOnWrite(err error) (string, bool) { + if suffix := retriableWriteErrorSuffixes.FindString(err.Error()); suffix != "" { + return errorLabelsBySuffix[suffix], true + } + return "", false +} + +var retriableReadErrorSuffixes = func() *regexp.Regexp { + var suffixes []string + for suffix := range errorLabelsBySuffix { + suffixes = append(suffixes, suffix) + } + return regexp.MustCompile(fmt.Sprintf(`(%s)$`, strings.Join(suffixes, `|`))) +}() + +// IsRetriableErrorOnRead returns true if and only if a retry should be attempted when the provided +// error is returned from a read attempt. If the error is retriable, a non-empty string classifying +// the error is also returned. +func IsRetriableErrorOnRead(err error) (string, bool) { + if suffix := retriableReadErrorSuffixes.FindString(err.Error()); suffix != "" { + return errorLabelsBySuffix[suffix], true } - return + if etcdError, ok := etcdrpc.Error(err).(etcdrpc.EtcdError); ok && etcdError.Code() == codes.Unavailable { + return "Unavailable", true + } + return "", false } -// onError allows the caller to retry fn in case the error returned by fn is retriable +// OnError allows the caller to retry fn in case the error returned by fn is retriable // according to the provided function. backoff defines the maximum retries and the wait // interval between two retries. -func onError(ctx context.Context, backoff wait.Backoff, retriable func(error) (string, bool), fn func() error) error { +func OnError(ctx context.Context, backoff wait.Backoff, retriable func(error) (string, bool), fn func() error) error { var lastErr error var lastErrLabel string var retry bool @@ -163,6 +208,9 @@ func onError(ctx context.Context, backoff wait.Backoff, retriable func(error) (s } lastErrLabel, retry = retriable(err) + if klog.V(6).Enabled() { + klog.V(6).InfoS("observed storage error", "err", err, "retriable", retry) + } if retry { lastErr = err retryCounter++ diff --git a/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go b/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go index fbb97147c3..9742a66f34 100644 --- a/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go +++ b/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go @@ -155,13 +155,13 @@ func newETCD3Check(c storagebackend.Config, timeout time.Duration, stopCh <-chan // retry in a loop in the background until we successfully create the client, storing the client or error encountered lock := sync.RWMutex{} - var prober *etcd3ProberMonitor + var prober *etcd3RetryingProberMonitor clientErr := fmt.Errorf("etcd client connection not yet established") go wait.PollImmediateUntil(time.Second, func() (bool, error) { lock.Lock() defer lock.Unlock() - newProber, err := newETCD3ProberMonitor(c) + newProber, err := newRetryingETCD3ProberMonitor(c) // Ensure that server is already not shutting down. select { case <-stopCh: diff --git a/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/factory.go b/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/factory.go index 2bf3727e8a..0967a84cbe 100644 --- a/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/factory.go +++ b/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/factory.go @@ -69,7 +69,7 @@ func CreateProber(c storagebackend.Config) (Prober, error) { case storagebackend.StorageTypeETCD2: return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type) case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3: - return newETCD3ProberMonitor(c) + return newRetryingETCD3ProberMonitor(c) default: return nil, fmt.Errorf("unknown storage type: %s", c.Type) } @@ -80,7 +80,7 @@ func CreateMonitor(c storagebackend.Config) (metrics.Monitor, error) { case storagebackend.StorageTypeETCD2: return nil, fmt.Errorf("%s is no longer a supported storage backend", c.Type) case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3: - return newETCD3ProberMonitor(c) + return newRetryingETCD3ProberMonitor(c) default: return nil, fmt.Errorf("unknown storage type: %s", c.Type) } diff --git a/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/retry_etcdprobemonitor.go b/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/retry_etcdprobemonitor.go new file mode 100644 index 0000000000..0e6c19b451 --- /dev/null +++ b/vendor/k8s.io/apiserver/pkg/storage/storagebackend/factory/retry_etcdprobemonitor.go @@ -0,0 +1,46 @@ +package factory + +import ( + "context" + + "k8s.io/apiserver/pkg/storage/etcd3/etcd3retry" + "k8s.io/apiserver/pkg/storage/etcd3/metrics" + "k8s.io/apiserver/pkg/storage/storagebackend" +) + +type proberMonitor interface { + Prober + metrics.Monitor +} + +type etcd3RetryingProberMonitor struct { + delegate proberMonitor +} + +func newRetryingETCD3ProberMonitor(c storagebackend.Config) (*etcd3RetryingProberMonitor, error) { + delegate, err := newETCD3ProberMonitor(c) + if err != nil { + return nil, err + } + return &etcd3RetryingProberMonitor{delegate: delegate}, nil +} + +func (t *etcd3RetryingProberMonitor) Probe(ctx context.Context) error { + return etcd3retry.OnError(ctx, etcd3retry.DefaultRetry, etcd3retry.IsRetriableErrorOnRead, func() error { + return t.delegate.Probe(ctx) + }) +} + +func (t *etcd3RetryingProberMonitor) Monitor(ctx context.Context) (metrics.StorageMetrics, error) { + var ret metrics.StorageMetrics + err := etcd3retry.OnError(ctx, etcd3retry.DefaultRetry, etcd3retry.IsRetriableErrorOnRead, func() error { + var innerErr error + ret, innerErr = t.delegate.Monitor(ctx) + return innerErr + }) + return ret, err +} + +func (t *etcd3RetryingProberMonitor) Close() error { + return t.delegate.Close() +} diff --git a/vendor/modules.txt b/vendor/modules.txt index ce677d8f59..0e39e051c1 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1286,7 +1286,7 @@ k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/netutil k8s.io/apimachinery/third_party/forked/golang/reflect -# k8s.io/apiserver v0.29.2 => github.com/openshift/kubernetes-apiserver v0.0.0-20240410114447-9e7c11c45dab +# k8s.io/apiserver v0.29.2 => github.com/openshift/kubernetes-apiserver v0.0.0-20250915121356-f80f5359033a ## explicit; go 1.21 k8s.io/apiserver/pkg/admission k8s.io/apiserver/pkg/admission/cel @@ -2284,7 +2284,7 @@ sigs.k8s.io/yaml # k8s.io/api => k8s.io/api v0.29.2 # k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.29.2 # k8s.io/apimachinery => k8s.io/apimachinery v0.29.2 -# k8s.io/apiserver => github.com/openshift/kubernetes-apiserver v0.0.0-20240410114447-9e7c11c45dab +# k8s.io/apiserver => github.com/openshift/kubernetes-apiserver v0.0.0-20250915121356-f80f5359033a # k8s.io/cli-runtime => k8s.io/cli-runtime v0.29.2 # k8s.io/client-go => k8s.io/client-go v0.29.2 # k8s.io/cloud-provider => k8s.io/cloud-provider v0.29.2