Skip to content

Commit

Permalink
Available CRDs
Browse files Browse the repository at this point in the history
Reasons for this enhancement:
- A controller cannot set up a watch for a CRD that is not installed on
 the cluster, trying to set up a watch will panic the operator
- There is no known way, that we are aware of, to add a watch later
 without client cache issue

How does the enhancement work around the issue:
- On start up, detect which CRD are avail (out of a fixed list) and
 skip watches for ones that are not avail
- At the start each reconcile iteration, revalidate which CRD are now
 available. If a CRD of interest is now avail, exit the op with a known
  exit code (42)
- Have the pod command detect the exit code and if it is the known
 exist code (42), restart the process

This process will guarantee that the pod does restart when a new CRD of
 interest becomes available. This in turn helps to avoid the following
  issue:
- Pod will not get into CrushLoopBackoff state
- There will be no change that the pod becomes unschedulable after
 restart as of missing resources

Signed-off-by: raaizik <[email protected]>
Co-Authored-By: Rewant Soni <[email protected]>
  • Loading branch information
raaizik and rewantsoni committed Aug 1, 2024
1 parent bb238df commit 68ed6dc
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 7 deletions.
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ COPY --from=builder workspace/provider-api /usr/local/bin/provider-api
COPY --from=builder workspace/onboarding-validation-keys-gen /usr/local/bin/onboarding-validation-keys-gen
COPY --from=builder workspace/metrics/deploy/*rules*.yaml /ocs-prometheus-rules/
COPY --from=builder workspace/ux-backend-server /usr/local/bin/ux-backend-server
COPY --from=builder workspace/hack/crdavail.sh /usr/local/bin/crdavail

RUN chmod +x /usr/local/bin/ocs-operator /usr/local/bin/provider-api
RUN chmod +x /usr/local/bin/ocs-operator /usr/local/bin/provider-api /usr/local/bin/crdavail

USER operator

ENTRYPOINT ["/usr/local/bin/ocs-operator"]
ENTRYPOINT ["/usr/local/bin/crdavail"]
73 changes: 73 additions & 0 deletions controllers/crd/crd_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package crd

import (
"context"
"github.com/go-logr/logr"
"github.com/red-hat-storage/ocs-operator/v4/controllers/util"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/klog/v2"
"reflect"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// CustomResourceDefinitionReconciler reconciles a CustomResourceDefinition object
// nolint:revive
type CustomResourceDefinitionReconciler struct {
Client client.Client
ctx context.Context
Log logr.Logger
AvailableCrds map[string]bool
}

// Reconcile compares available CRDs maps following either a Create or Delete event
func (r *CustomResourceDefinitionReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
r.ctx = ctx
r.Log.Info("Reconciling CustomResourceDefinition.", "CRD", klog.KRef(request.Namespace, request.Name))

var err error
availableCrds, err := util.MapCRDAvailability(ctx, r.Client, util.CRDList...)
if err != nil {
return reconcile.Result{}, err
}
if !reflect.DeepEqual(availableCrds, r.AvailableCrds) {
r.Log.Info("CustomResourceDefinitions created/deleted. Restarting process.")
panic("CustomResourceDefinitions created/deleted. Restarting process.")
}
return reconcile.Result{}, nil
}

// SetupWithManager sets up a controller with a manager
func (r *CustomResourceDefinitionReconciler) SetupWithManager(mgr ctrl.Manager) error {
crdPredicate := predicate.Funcs{
CreateFunc: func(e event.TypedCreateEvent[client.Object]) bool {
crdAvailable, keyExist := r.AvailableCrds[e.Object.GetName()]
if keyExist && !crdAvailable {
r.Log.Info("CustomResourceDefinition %s was Created.", e.Object.GetName())
return true
}
return false
},
DeleteFunc: func(e event.TypedDeleteEvent[client.Object]) bool {
crdAvailable, keyExist := r.AvailableCrds[e.Object.GetName()]
if keyExist && crdAvailable {
r.Log.Info("CustomResourceDefinition %s was Deleted.", e.Object.GetName())
return true
}
return false
},
UpdateFunc: func(e event.TypedUpdateEvent[client.Object]) bool {
return false
},
GenericFunc: func(e event.TypedGenericEvent[client.Object]) bool {
return false
},
}
return ctrl.NewControllerManagedBy(mgr).
For(&apiextensionsv1.CustomResourceDefinition{}, builder.WithPredicates(crdPredicate)).
Complete(r)
}
16 changes: 16 additions & 0 deletions controllers/util/k8sutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
ocsv1 "github.com/red-hat-storage/ocs-operator/api/v4/v1"
corev1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -44,6 +45,8 @@ const (
OwnerUIDIndexName = "ownerUID"
)

var CRDList []string

// GetWatchNamespace returns the namespace the operator should be watching for changes
func GetWatchNamespace() (string, error) {
ns, found := os.LookupEnv(WatchNamespaceEnvVar)
Expand Down Expand Up @@ -149,3 +152,16 @@ func GenerateNameForNonResilientCephBlockPoolSC(initData *ocsv1.StorageCluster)
}
return fmt.Sprintf("%s-ceph-non-resilient-rbd", initData.Name)
}

func MapCRDAvailability(ctx context.Context, clnt client.Client, crdNames ...string) (map[string]bool, error) {
crdExist := map[string]bool{}
for _, crdName := range crdNames {
crd := &apiextensionsv1.CustomResourceDefinition{}
crd.Name = crdName
if err := clnt.Get(ctx, client.ObjectKeyFromObject(crd), crd); client.IgnoreNotFound(err) != nil {
return nil, fmt.Errorf("error getting CRD, %v", err)
}
crdExist[crdName] = crd.UID != ""
}
return crdExist, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -620,11 +620,8 @@ spec:
name: ocs-operator
spec:
containers:
- args:
- --enable-leader-election
- --health-probe-bind-address=:8081
command:
- ocs-operator
- command:
- crdavail
env:
- name: WATCH_NAMESPACE
valueFrom:
Expand Down
17 changes: 17 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"flag"
"fmt"
"github.com/red-hat-storage/ocs-operator/v4/controllers/crd"
"os"
"runtime"

Expand Down Expand Up @@ -252,6 +253,22 @@ func main() {
os.Exit(1)
}

availCrds, err := util.MapCRDAvailability(context.Background(), apiClient, util.CRDList...)
if err != nil {
setupLog.Error(err, "Unable to get CRD")
os.Exit(1)
}
if len(util.CRDList) > 0 {
if err = (&crd.CustomResourceDefinitionReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("CustomResourceDefinitionReconciler"),
AvailableCrds: availCrds,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CustomResourceDefinitionReconciler")
os.Exit(1)
}
}

// Set OperatorCondition Upgradeable to True
// We have to at least default the condition to True or
// OLM will use the Readiness condition via our readiness probe instead:
Expand Down

0 comments on commit 68ed6dc

Please sign in to comment.