Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CNF-15754: nodegroups refactor #1142

Closed
wants to merge 10 commits into from
54 changes: 19 additions & 35 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
"github.com/openshift-kni/numaresources-operator/pkg/loglevel"
"github.com/openshift-kni/numaresources-operator/pkg/nodegroups"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
apistate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/api"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
Expand All @@ -82,15 +83,16 @@ type poolDaemonSet struct {
// NUMAResourcesOperatorReconciler reconciles a NUMAResourcesOperator object
type NUMAResourcesOperatorReconciler struct {
client.Client
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtestate.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtestate.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
NodeGroupsManager nodegroups.Manager
}

// TODO: narrow down
Expand Down Expand Up @@ -150,22 +152,19 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, nil
}

if err := validation.NodeGroups(instance.Spec.NodeGroups, r.Platform); err != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err)
if err := r.NodeGroupsManager.Validate(instance.Spec.NodeGroups); err != nil {
return r.degradeStatus(ctx, instance, nodegroups.ValidationError, err)
}

trees, err := getTreesByNodeGroup(ctx, r.Client, instance.Spec.NodeGroups, r.Platform)
trees, err := r.NodeGroupsManager.FetchTrees(ctx, r.Client, instance.Spec.NodeGroups)
if err != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err)
return r.degradeStatus(ctx, instance, nodegroups.ValidationError, err)
}

var multiMCPsErr error
if r.Platform == platform.OpenShift {
multiMCPsErr = validation.MultipleMCPsPerTree(instance.Annotations, trees)
multiMCPsErr := validation.MultipleMCPsPerTree(instance.Annotations, trees)

if err := validation.MachineConfigPoolDuplicates(trees); err != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err)
}
if err := validation.MachineConfigPoolDuplicates(trees); err != nil {
return r.degradeStatus(ctx, instance, nodegroups.ValidationError, err)
}

for idx := range trees {
Expand All @@ -178,7 +177,7 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr
step := r.reconcileResource(ctx, instance, trees)

if step.Done() && multiMCPsErr != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, multiMCPsErr)
return r.degradeStatus(ctx, instance, nodegroups.ValidationError, multiMCPsErr)
}

if !status.IsUpdatedNUMAResourcesOperator(curStatus, &instance.Status) {
Expand Down Expand Up @@ -765,18 +764,3 @@ func isDaemonSetReady(ds *appsv1.DaemonSet) bool {
}
return ds.Status.DesiredNumberScheduled > 0 && ds.Status.DesiredNumberScheduled == ds.Status.NumberReady
}

func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nropv1.NodeGroup, platf platform.Platform) ([]nodegroupv1.Tree, error) {
switch platf {
case platform.OpenShift:
mcps := &machineconfigv1.MachineConfigPoolList{}
if err := cli.List(ctx, mcps); err != nil {
return nil, err
}
return nodegroupv1.FindTreesOpenshift(mcps, nodeGroups)
case platform.HyperShift:
return nodegroupv1.FindTreesHypershift(nodeGroups), nil
default:
return nil, fmt.Errorf("unsupported platform")
}
}
30 changes: 17 additions & 13 deletions controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ import (
testobjs "github.com/openshift-kni/numaresources-operator/internal/objects"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/nodegroups"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
"github.com/openshift-kni/numaresources-operator/pkg/validation"
)

const (
Expand All @@ -76,7 +76,10 @@ func NewFakeNUMAResourcesOperatorReconciler(plat platform.Platform, platVersion
return nil, err
}
recorder := record.NewFakeRecorder(bufferSize)

manager, err := nodegroups.NewForPlatform(plat)
if err != nil {
return nil, err
}
return &NUMAResourcesOperatorReconciler{
Client: fakeClient,
Scheme: scheme.Scheme,
Expand All @@ -90,7 +93,8 @@ func NewFakeNUMAResourcesOperatorReconciler(plat platform.Platform, platVersion
Images: images.Data{
Builtin: testImageSpec,
},
Recorder: recorder,
Recorder: recorder,
NodeGroupsManager: manager,
}, nil
}

Expand Down Expand Up @@ -125,7 +129,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
It("should update the CR condition to degraded", func() {
ng := nropv1.NodeGroup{}
nro := testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng)
verifyDegradedCondition(nro, validation.NodeGroupsError, platf)
verifyDegradedCondition(nro, nodegroups.ValidationError, platf)
})
})

Expand All @@ -136,7 +140,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
PoolName: &pn,
}
nro := testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng)
verifyDegradedCondition(nro, validation.NodeGroupsError, platf)
verifyDegradedCondition(nro, nodegroups.ValidationError, platf)
})
})

Expand All @@ -150,7 +154,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
PoolName: &pn,
}
nro := testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng)
verifyDegradedCondition(nro, validation.NodeGroupsError, platf)
verifyDegradedCondition(nro, nodegroups.ValidationError, platf)
})
})

Expand Down Expand Up @@ -178,7 +182,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(reconciler.Client.Get(context.TODO(), key, nro)).ToNot(HaveOccurred())
degradedCondition := getConditionByType(nro.Status.Conditions, status.ConditionDegraded)
Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue))
Expect(degradedCondition.Reason).To(Equal(validation.NodeGroupsError))
Expect(degradedCondition.Reason).To(Equal(nodegroups.ValidationError))
})
})

Expand Down Expand Up @@ -412,7 +416,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
nro.Spec.NodeGroups[0] = *ng1WithNodeSelector
return reconciler.Client.Update(context.TODO(), nro)
}).WithPolling(1 * time.Second).WithTimeout(30 * time.Second).ShouldNot(HaveOccurred())
verifyDegradedCondition(nro, validation.NodeGroupsError, platf)
verifyDegradedCondition(nro, nodegroups.ValidationError, platf)
})
})
When("pause annotation enabled", func() {
Expand Down Expand Up @@ -1066,15 +1070,15 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
MachineConfigPoolSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"test": "test"}},
}
nro := testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng)
verifyDegradedCondition(nro, validation.NodeGroupsError, platform.OpenShift)
verifyDegradedCondition(nro, nodegroups.ValidationError, platform.OpenShift)
})
It("should update the CR condition to degraded when PoolName set", func() {
pn := "pn-1"
ng := nropv1.NodeGroup{
PoolName: &pn,
}
nro := testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng)
verifyDegradedCondition(nro, validation.NodeGroupsError, platform.OpenShift)
verifyDegradedCondition(nro, nodegroups.ValidationError, platform.OpenShift)
})
})

Expand Down Expand Up @@ -1109,7 +1113,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(reconciler.Client.Get(context.TODO(), key, nro)).ToNot(HaveOccurred())
degradedCondition := getConditionByType(nro.Status.Conditions, status.ConditionDegraded)
Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue))
Expect(degradedCondition.Reason).To(Equal(validation.NodeGroupsError))
Expect(degradedCondition.Reason).To(Equal(nodegroups.ValidationError))
})
})

Expand Down Expand Up @@ -1170,7 +1174,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(reconciler.Client.Get(context.TODO(), key, nro)).ToNot(HaveOccurred())
degradedCondition := getConditionByType(nro.Status.Conditions, status.ConditionDegraded)
Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue))
Expect(degradedCondition.Reason).To(Equal(validation.NodeGroupsError))
Expect(degradedCondition.Reason).To(Equal(nodegroups.ValidationError))
})
It("should create objects and CR will be in Available condition when annotation is enabled - legacy", func() {
mcpName1 := "test1"
Expand Down Expand Up @@ -1297,7 +1301,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(reconciler.Client.Get(context.TODO(), key, nro)).ToNot(HaveOccurred())
degradedCondition := getConditionByType(nro.Status.Conditions, status.ConditionDegraded)
Expect(degradedCondition.Status).To(Equal(metav1.ConditionTrue))
Expect(degradedCondition.Reason).To(Equal(validation.NodeGroupsError))
Expect(degradedCondition.Reason).To(Equal(nodegroups.ValidationError))
})
})

Expand Down
17 changes: 12 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/nodegroups"
"github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/controlplane"
schedmanifests "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/manifests/sched"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
Expand Down Expand Up @@ -282,6 +283,11 @@ func main() {
os.Exit(1)
}

nodeGroupsManager, err := nodegroups.NewForPlatform(clusterPlatform)
if err != nil {
klog.ErrorS(err, "unable to create nodegroups manager")
os.Exit(2)
}
if err = (&controllers.NUMAResourcesOperatorReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Expand All @@ -291,11 +297,12 @@ func main() {
Core: rteManifestsRendered,
Metrics: rteMetricsManifests,
},
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
NodeGroupsManager: nodeGroupsManager,
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesOperator")
os.Exit(1)
Expand Down
70 changes: 70 additions & 0 deletions pkg/nodegroups/hypershift.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright 2025 Red Hat, Inc.
*/

package nodegroups

import (
"context"
"fmt"

"sigs.k8s.io/controller-runtime/pkg/client"

nropv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1"
nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1/helper/nodegroup"
)

var _ Manager = &HyperShiftManager{}

type HyperShiftManager struct{}

func NewHyperShiftManager() *HyperShiftManager {
return &HyperShiftManager{}
}

func (hsm *HyperShiftManager) Validate(nodeGroups []nropv1.NodeGroup) error {
if err := validateFields(nodeGroups); err != nil {
return err
}
if err := validatePoolName(nodeGroups); err != nil {
return err
}
if err := duplicatesByPoolName(nodeGroups); err != nil {
return err
}
return nil
}

func (hsm *HyperShiftManager) FetchTrees(ctx context.Context, cli client.Client, nodeGroups []nropv1.NodeGroup) ([]nodegroupv1.Tree, error) {
// node groups are validated by the controller before getting to this phase, so for sure, all node groups will be valid at this point.
// a valid node group in HyperShift can be referred by PoolName only.
result := make([]nodegroupv1.Tree, len(nodeGroups))
for i := range nodeGroups {
result[i] = nodegroupv1.Tree{NodeGroup: &nodeGroups[i]}
}
return result, nil
}

func validateFields(nodeGroups []nropv1.NodeGroup) error {
for idx, nodeGroup := range nodeGroups {
if nodeGroup.MachineConfigPoolSelector != nil {
return fmt.Errorf("node group %d specifies MachineConfigPoolSelector on Hypershift platform; Should specify PoolName only", idx)
}
if nodeGroup.PoolName == nil {
return fmt.Errorf("node group %d must specify PoolName on Hypershift platform", idx)
}
}
return nil
}
Loading
Loading