Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 76 additions & 1 deletion pkg/project/apiserver/registry/project/proxy/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package proxy
import (
"context"
"fmt"
"time"

kerrors "k8s.io/apimachinery/pkg/api/errors"
metainternal "k8s.io/apimachinery/pkg/apis/meta/internalversion"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/registry/rest"
Expand Down Expand Up @@ -208,11 +210,84 @@ func (s *REST) Update(ctx context.Context, name string, objInfo rest.UpdatedObje

var _ = rest.GracefulDeleter(&REST{})

// maxRetriesOnConflict is the maximum retry count for Delete calls which
// result in resource conflicts.
const maxRetriesOnConflict = 10

// maxDuration set max duration of delete retries. Deleting a project affects apiserver latency,
// so this should be kept as small as possible
const maxDuration = time.Second

// Delete deletes a Project specified by its name
func (s *REST) Delete(ctx context.Context, name string, objectFunc rest.ValidateObjectFunc, options *metav1.DeleteOptions) (runtime.Object, bool, error) {
var opts metav1.DeleteOptions
if options != nil {
opts = *options
}
return &metav1.Status{Status: metav1.StatusSuccess}, false, s.client.Delete(ctx, name, opts)
var lastErr error
err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{Steps: maxRetriesOnConflict, Duration: maxDuration}, func(ctx context.Context) (bool, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm understanding the Godoc for Duration correctly, this means that we will sleep for one second between retries. That seems high to me. I bet it is a lot longer than a typical total latency of both namespace requests combined.

We can configure the other fields for exponential backoff so that initial retry is fairly fast.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, right, somehow I though "Duration" is max duration we're allowed to spend. I think wait.Backoff{Steps: maxRetriesOnConflict, Factor: 1/maxRetriesOnConflict, Cap: maxDuration, Duration: maxDuration/maxRetriesOnConflict} would make it "up to 1 second" and ensure it has several retries

var err error
if objectFunc != nil {
var obj runtime.Object
getOpts := metav1.GetOptions{}
if opts.Preconditions != nil && opts.Preconditions.ResourceVersion != nil {
getOpts.ResourceVersion = *opts.Preconditions.ResourceVersion
}
obj, err = s.Get(ctx, name, &getOpts)
if err != nil {
lastErr = fmt.Errorf("unable to get project: %w", err)
return false, nil
}
projectObj, ok := obj.(*projectapi.Project)
if !ok || projectObj == nil {
lastErr = fmt.Errorf("not a project: %#v", obj)
return false, nil
}
if opts.Preconditions == nil {
opts.Preconditions = &metav1.Preconditions{}
}
if options.Preconditions != nil {
// Throw an error if the UID or ResourceVersion preconditions do not match fetched object already
// This would avoid extra retries when user has provided invalid preconditions
if opts.Preconditions.UID != nil && projectObj.UID != *options.Preconditions.UID {
lastErr = fmt.Errorf("precondition UID %s does not match project UID %s", *opts.Preconditions.UID, projectObj.UID)
return false, nil
}
if opts.Preconditions.ResourceVersion != nil && projectObj.ResourceVersion != *options.Preconditions.ResourceVersion {
lastErr = fmt.Errorf("precondition RV %s does not match project RV %s", *opts.Preconditions.ResourceVersion, projectObj.ResourceVersion)
return false, nil
}
}
// Make sure the object hasn't changed between Get and Delete - pass UID and RV to delete options
// unless Precondition is already set
if opts.Preconditions.UID == nil {
opts.Preconditions.UID = &projectObj.UID
}
if opts.Preconditions.ResourceVersion == nil {
opts.Preconditions.ResourceVersion = &projectObj.ResourceVersion
}
Comment on lines +263 to +268
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't want to retry conflicts that are caused by client-provided preconditions (they are probably doomed unless the request changes).

If we might have propagated one precondition from the request, and added a second precondition here, it becomes hard to robustly determine which precondition caused a conflict. One way to solve this might be to inspect the fresh namespace returned from Get and enforce any client-provided preconditions immediately. After that, we know that both preconditions passed to the namespace Delete came from this code and that a retry might succeed with a newer UID/RV.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check that fetched project matches RV and/or UID before proceeding with delete


if err := objectFunc(ctx, obj); err != nil {
lastErr = fmt.Errorf("validation func failed: %w", err)
return false, nil
}
}
err = s.client.Delete(ctx, name, opts)
switch {
case err == nil:
return true, nil
case kerrors.IsConflict(err):
lastErr = err
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add tests showing that retry happens on conflict, no retry happens on non-conflict, and one where retries are exhausted please?

return false, nil
default:
return false, err
}
})
if err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ExponentialBackoffWithContext [..] immediately returns an error if the condition returns an error

I don't understand how this helped. Isn't the doc for this function telling me that it would return immediately if there were a delete conflict?

if wait.Interrupted(err) {
err = lastErr
}
return &metav1.Status{Status: metav1.StatusFailure}, false, err
}
return &metav1.Status{Status: metav1.StatusSuccess}, false, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens when err != nil && !wait.Interrupted(err)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, missed that part - we should replace err with lastErr is its an interrupt error

}
218 changes: 214 additions & 4 deletions pkg/project/apiserver/registry/project/proxy/proxy_test.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
package proxy

import (
"context"
"errors"
"fmt"
"strings"
"testing"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
ktypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apiserver/pkg/authentication/user"
apirequest "k8s.io/apiserver/pkg/endpoints/request"
"k8s.io/apiserver/pkg/registry/rest"
"k8s.io/client-go/kubernetes/fake"
ktesting "k8s.io/client-go/testing"

oapi "github.com/openshift/openshift-apiserver/pkg/api"
projectapi "github.com/openshift/openshift-apiserver/pkg/project/apis/project"
Expand Down Expand Up @@ -79,7 +85,7 @@ func TestCreateInvalidProject(t *testing.T) {
Annotations: map[string]string{oapi.OpenShiftDisplayName: "h\t\ni"},
},
}, rest.ValidateAllObjectFunc, &metav1.CreateOptions{})
if !errors.IsInvalid(err) {
if !kerrors.IsInvalid(err) {
t.Errorf("Expected 'invalid' error, got %v", err)
}
}
Expand All @@ -101,6 +107,27 @@ func TestCreateProjectOK(t *testing.T) {
}
}

func TestCreateProjectValidation(t *testing.T) {
mockClient := &fake.Clientset{}
storage := NewREST(mockClient.CoreV1().Namespaces(), &mockLister{}, nil, nil)

validationCalled := false
validationFunc := func(ctx context.Context, obj runtime.Object) error {
validationCalled = true
return nil
}

_, err := storage.Create(apirequest.NewContext(), &projectapi.Project{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
}, validationFunc, &metav1.CreateOptions{})
if err != nil {
t.Errorf("Unexpected non-nil error: %#v", err)
}
if !validationCalled {
t.Errorf("Expected validation function to be called")
}
}

func TestGetProjectOK(t *testing.T) {
mockClient := fake.NewSimpleClientset(&corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "foo"}})
storage := NewREST(mockClient.CoreV1().Namespaces(), &mockLister{}, nil, nil)
Expand Down Expand Up @@ -136,9 +163,192 @@ func TestDeleteProject(t *testing.T) {
t.Errorf("Expected status=success, got: %#v", status)
}
if len(mockClient.Actions()) != 1 {
t.Errorf("Expected client action for delete")
t.Errorf("Expected client action for delete, got %v", mockClient.Actions())
}
if !mockClient.Actions()[0].Matches("delete", "namespaces") {
t.Errorf("Expected call to delete-namespace")
t.Errorf("Expected call to delete-namespace, got %#v", mockClient.Actions()[0])
}
}

func TestDeleteProjectValidation(t *testing.T) {
mockClient := &fake.Clientset{}
storage := REST{
client: mockClient.CoreV1().Namespaces(),
}
validationCalled := false
validationFunc := func(ctx context.Context, obj runtime.Object) error {
validationCalled = true
return nil
}

storage.Delete(apirequest.NewContext(), "foo", validationFunc, &metav1.DeleteOptions{})
if !validationCalled {
t.Errorf("Expected validation function to be called")
}
}

func TestDeleteProjectValidationRetries(t *testing.T) {
mockClient := &fake.Clientset{}
storage := REST{
client: mockClient.CoreV1().Namespaces(),
}
maxRetries := 3
validationRetries := 0
validationFunc := func(ctx context.Context, obj runtime.Object) error {
if validationRetries < maxRetries {
validationRetries++
return fmt.Errorf("not there yet")
}
return nil
}
obj, _, err := storage.Delete(apirequest.NewContext(), "foo", validationFunc, &metav1.DeleteOptions{})
if obj == nil {
t.Error("Unexpected nil obj")
}
if err != nil {
t.Errorf("Unexpected non-nil error: %#v", err)
}
status, ok := obj.(*metav1.Status)
if !ok {
t.Errorf("Expected status type, got: %#v", obj)
}
if status.Status != metav1.StatusSuccess {
t.Errorf("Expected status=success, got: %#v", status)
}
if len(mockClient.Actions()) != maxRetries+2 {
t.Errorf("Expected client action for get, got %v", mockClient.Actions())
}
for i := range len(mockClient.Actions()) - 1 {
if !mockClient.Actions()[i].Matches("get", "namespaces") {
t.Errorf("Expected call #%d to get-namespace, got %#v", i, mockClient.Actions()[i])
}
}
if !mockClient.Actions()[len(mockClient.Actions())-1].Matches("delete", "namespaces") {
t.Errorf("Expected call #%d to delete-namespace, got %#v", len(mockClient.Actions())-1, mockClient.Actions()[len(mockClient.Actions())-1])
}
if validationRetries != maxRetries {
t.Errorf("Expected validation function to be retried %d times, got %d", maxRetries, validationRetries)
}
}

func TestDeleteProjectValidationError(t *testing.T) {
mockClient := &fake.Clientset{}
storage := REST{
client: mockClient.CoreV1().Namespaces(),
}
validationError := fmt.Errorf("faulty function")

validationFunc := func(ctx context.Context, obj runtime.Object) error {
return validationError
}
obj, _, err := storage.Delete(apirequest.NewContext(), "foo", validationFunc, &metav1.DeleteOptions{})
if obj == nil {
t.Error("Unexpected nil obj")
}
status, ok := obj.(*metav1.Status)
if !ok {
t.Errorf("Expected status type, got: %#v", obj)
}
if status.Status != metav1.StatusFailure {
t.Errorf("Expected status=failure, got: %#v", status)
}
if err == nil {
t.Errorf("Expected error but got nil")
}
if errors.Unwrap(err) != validationError {
t.Errorf("Unexpected error: %#v", errors.Unwrap(err))
}
}

func TestDeleteProjectValidationPreconditionUID(t *testing.T) {
uid := ktypes.UID("first-uid")
resourceVersion := "10"
meta := metav1.ObjectMeta{Name: "foo", UID: uid, ResourceVersion: resourceVersion}
namespaceList := corev1.NamespaceList{
Items: []corev1.Namespace{
{
ObjectMeta: meta,
},
},
}

tests := []struct {
testName string
uid, resourceVersion string
}{
{
testName: "all unset",
},
{
testName: "uid set",
uid: "first-uid",
},
{
testName: "resourceVersion set",
resourceVersion: "10",
},
{
testName: "both set",
uid: "first-uid",
resourceVersion: "10",
},
}
for _, test := range tests {
t.Run(test.testName, func(t *testing.T) {
mockClient := fake.NewSimpleClientset(&namespaceList)
storage := REST{
client: mockClient.CoreV1().Namespaces(),
}
validationFunc := func(ctx context.Context, obj runtime.Object) error {
return nil
}

expectedUID := uid
expectedRV := resourceVersion
deleteOpts := &metav1.DeleteOptions{}
if len(test.uid) > 0 {
expectedUID = ktypes.UID(test.uid)
if deleteOpts.Preconditions == nil {
deleteOpts.Preconditions = &metav1.Preconditions{}
}
deleteOpts.Preconditions.UID = &expectedUID
}
if len(test.resourceVersion) > 0 {
expectedRV = test.resourceVersion
if deleteOpts.Preconditions == nil {
deleteOpts.Preconditions = &metav1.Preconditions{}
}
deleteOpts.Preconditions.ResourceVersion = &expectedRV
}

storage.Delete(apirequest.NewContext(), "foo", validationFunc, deleteOpts)
if len(mockClient.Actions()) != 2 {
t.Errorf("Expected client action for get and delete, got %v", mockClient.Actions())
}
if !mockClient.Actions()[0].Matches("get", "namespaces") {
t.Errorf("Expected call to get-namespace, got %#v", mockClient.Actions()[0])
}
lastAction := mockClient.Actions()[1]
if !lastAction.Matches("delete", "namespaces") {
t.Errorf("Expected call to delete-namespace, got %#v", mockClient.Actions()[1])
}
deleteAction := lastAction.(ktesting.DeleteActionImpl)
preconditions := deleteAction.DeleteOptions.Preconditions
if preconditions == nil {
t.Fatalf("Expected DeleteOptions precondition to be non-nil")
}
if preconditions.UID == nil {
t.Fatalf("Expected DeleteOptions precondition UID to be non-nil")
}
if *preconditions.UID != expectedUID {
t.Errorf("Expected DeleteOptions precondition UID to %#v, got %#v", expectedUID, *preconditions.UID)
}
if preconditions.ResourceVersion == nil {
t.Fatalf("Expected DeleteOptions precondition ResourceVersion to be non-nil")
}
if *preconditions.ResourceVersion != expectedRV {
t.Errorf("Expected DeleteOptions precondition ResourceVersion to %#v, got %#v", expectedRV, *preconditions.ResourceVersion)
}
})
}
}