Skip to content

Commit

Permalink
Merge pull request #1154 from Tal-or/nodegroups_refactor2
Browse files Browse the repository at this point in the history
CNF-15754: validation refinement
  • Loading branch information
openshift-merge-bot[bot] authored Jan 20, 2025
2 parents 1c0a380 + 4e5a5ca commit 9bb8de8
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 38 deletions.
14 changes: 5 additions & 9 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,9 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err)
}

var multiMCPsErr error
if r.Platform == platform.OpenShift {
multiMCPsErr = validation.MultipleMCPsPerTree(instance.Annotations, trees)

if err := validation.MachineConfigPoolDuplicates(trees); err != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err)
}
tolerable, err := validation.NodeGroupsTree(instance, trees, r.Platform)
if err != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, err)
}

for idx := range trees {
Expand All @@ -177,8 +173,8 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr

step := r.reconcileResource(ctx, instance, trees)

if step.Done() && multiMCPsErr != nil {
return r.degradeStatus(ctx, instance, validation.NodeGroupsError, multiMCPsErr)
if step.Done() && tolerable != nil {
return r.degradeStatus(ctx, instance, tolerable.Reason, tolerable.Error)
}

if !status.IsUpdatedNUMAResourcesOperator(curStatus, &instance.Status) {
Expand Down
80 changes: 54 additions & 26 deletions pkg/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,18 @@ const (
NodeGroupsError = "ValidationErrorUnderNodeGroups"
)

// MachineConfigPoolDuplicates validates selected MCPs for duplicates
func MachineConfigPoolDuplicates(trees []nodegroupv1.Tree) error {
// TolerableError represents an error which should not cause the validation
// to fail, this is more akin to just an error to pop in the logs or in the
// object Conditions or Status, perhaps degraded.
type TolerableError struct {
// Error is an error that is worth reporting but still should not cause the validation to fail
Error error
// Reason is a human-oriented message to provide context for the error
Reason string
}

// MCPsDuplicates validates selected MCPs for duplicates
func MCPsDuplicates(trees []nodegroupv1.Tree) error {
duplicates := map[string]int{}
for _, tree := range trees {
for _, mcp := range tree.MachineConfigPools {
Expand All @@ -53,27 +63,41 @@ func MachineConfigPoolDuplicates(trees []nodegroupv1.Tree) error {
return duplicateErrors
}

type nodeGroupsValidatorFunc func(nodeGroups []nropv1.NodeGroup) error
func MultipleMCPsPerTree(annot map[string]string, trees []nodegroupv1.Tree) error {
multiMCPsPerTree := annotations.IsMultiplePoolsPerTreeEnabled(annot)
if multiMCPsPerTree {
return nil
}

// NodeGroups validates the node groups for nil values and duplicates.
func NodeGroups(nodeGroups []nropv1.NodeGroup, platf platform.Platform) error {
// platform-specific validations
if platf == platform.HyperShift {
if err := nodeGroupForHypershift(nodeGroups); err != nil {
return err
var err error
for _, tree := range trees {
if len(tree.MachineConfigPools) > 1 {
err = errors.Join(err, fmt.Errorf("found multiple pools matches for node group %v but expected one. Pools found %v", &tree.NodeGroup, tree.MachineConfigPools))
}
}
return err
}

type nodeGroupsValidatorFunc func(nodeGroups []nropv1.NodeGroup) error

// NodeGroups validates the node groups for nil values and duplicates.
func NodeGroups(nodeGroups []nropv1.NodeGroup, platf platform.Platform) error {
// platform-agnostic validation.
validatorFuncs := []nodeGroupsValidatorFunc{
nodeGroupsSpecifier,
nodeGroupsDuplicatesByMCPSelector,
nodeGroupsValidPoolName,
nodeGroupsDuplicatesByPoolName,
nodeGroupsValidMachineConfigPoolSelector,
nodeGroupsAnnotations,
}

// platform-specific validations
if platf == platform.HyperShift {
validatorFuncs = append(validatorFuncs, nodeGroupForHypershift)
}
if platf == platform.OpenShift {
validatorFuncs = append(validatorFuncs, nodeGroupsDuplicatesByMCPSelector, nodeGroupsValidMachineConfigPoolSelector)
}

for _, validatorFunc := range validatorFuncs {
if err := validatorFunc(nodeGroups); err != nil {
return err
Expand All @@ -82,6 +106,25 @@ func NodeGroups(nodeGroups []nropv1.NodeGroup, platf platform.Platform) error {
return nil
}

func NodeGroupsTree(instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree, platf platform.Platform) (*TolerableError, error) {
if platf == platform.HyperShift {
// nothing to do for now
return nil, nil
}

var tolErr *TolerableError
multiMCPsErr := MultipleMCPsPerTree(instance.Annotations, trees)
if multiMCPsErr != nil {
tolErr = &TolerableError{
Reason: NodeGroupsError,
Error: multiMCPsErr,
}
}

err := MCPsDuplicates(trees)
return tolErr, err
}

func nodeGroupForHypershift(nodeGroups []nropv1.NodeGroup) error {
for idx, nodeGroup := range nodeGroups {
if nodeGroup.MachineConfigPoolSelector != nil {
Expand Down Expand Up @@ -194,18 +237,3 @@ func nodeGroupsAnnotations(nodeGroups []nropv1.NodeGroup) error {
}
return err
}

func MultipleMCPsPerTree(annot map[string]string, trees []nodegroupv1.Tree) error {
multiMCPsPerTree := annotations.IsMultiplePoolsPerTreeEnabled(annot)
if multiMCPsPerTree {
return nil
}

var err error
for _, tree := range trees {
if len(tree.MachineConfigPools) > 1 {
err = errors.Join(err, fmt.Errorf("found multiple pools matches for node group %v but expected one. Pools found %+v", &tree.NodeGroup, tree.MachineConfigPools))
}
}
return err
}
5 changes: 2 additions & 3 deletions pkg/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestMachineConfigPoolDuplicates(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := MachineConfigPoolDuplicates(tc.trees)
err := MCPsDuplicates(tc.trees)
if err == nil && tc.expectedError {
t.Errorf("expected error, succeeded")
}
Expand Down Expand Up @@ -220,7 +220,6 @@ func TestNodeGroupsSanity(t *testing.T) {
"test": "test",
},
},
PoolName: &poolName,
},
},
expectedError: true,
Expand All @@ -235,7 +234,7 @@ func TestNodeGroupsSanity(t *testing.T) {
},
},
expectedError: true,
expectedErrorMessage: "must specify PoolName on Hypershift platform",
expectedErrorMessage: "node group 0 missing any pool specifier",
platf: platform.HyperShift,
},
{
Expand Down

0 comments on commit 9bb8de8

Please sign in to comment.