Skip to content

Commit

Permalink
e2e: rewrite test 47674
Browse files Browse the repository at this point in the history
The previous version was fragile and maintained several times already.
This fact means that the test should be rewritten and once the known bugs
are fixed, new tests will be added to cover the e2e full scenario.

The test goal is to verify that adjustments to NROP CR are done as
expected and reflected in the RTE daemonsets. Previously, this was done
via creating a new MCP for the test and associating to it one of the
worker nodes that were part of the initial NROP node groups. This was
causing a reboot when running on a cluster that is:
a. < 4.18 or
b. >= 4.18 and has the custom RTE selinux policy enabled

In the new version of the test, we keep checking the same functionality
while completely avoiding reboot. That is acheived by creating the new MCP,
keeping it empty (no nodes are associated to it) and appending it as a
second node group's selector. Then checks on the daemonsets are
performed and the only cleanup would be deleting the test mcp.

Signed-off-by: Shereen Haj <[email protected]>
  • Loading branch information
shajmakh committed Mar 11, 2025
1 parent eaf4e44 commit e51970d
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 1 deletion.
3 changes: 2 additions & 1 deletion internal/api/features/_topics.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"ngpoolname",
"nganns",
"metrics",
"nrtcrdanns"
"nrtcrdanns",
"tmp_b52958"
]
}
101 changes: 101 additions & 0 deletions test/e2e/serial/tests/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
nrtv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"

configv1 "github.com/openshift/api/config/v1"
operatorv1 "github.com/openshift/api/operator/v1"
perfprof "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
machineconfigv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"

Expand All @@ -60,6 +61,7 @@ import (
e2ereslist "github.com/openshift-kni/numaresources-operator/internal/resourcelist"
"github.com/openshift-kni/numaresources-operator/internal/wait"
"github.com/openshift-kni/numaresources-operator/pkg/kubeletconfig"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
"github.com/openshift-kni/numaresources-operator/pkg/status"
"github.com/openshift-kni/numaresources-operator/pkg/validation"
rteconfig "github.com/openshift-kni/numaresources-operator/rte/pkg/config"
Expand Down Expand Up @@ -138,6 +140,105 @@ var _ = Describe("[serial][disruptive] numaresources configuration management",
Context("cluster has at least one suitable node", func() {
timeout := 5 * time.Minute

It("[test_id:47674][images][tier2] should be able to modify the configurable values under the NUMAResourcesOperator CR", Label("images", "tier2"), Label("feature:tmp_b52958"), func(ctx context.Context) {
var nroOperObj nropv1.NUMAResourcesOperator
nroKey := objects.NROObjectKey()
Expect(fxt.Client.Get(ctx, nroKey, &nroOperObj)).To(Succeed())
initialNroOperObj := nroOperObj.DeepCopy()

testMCP := objects.TestMCP()
By(fmt.Sprintf("creating new MCP: %q", testMCP.Name))
// we must have this label in order to match other machine configs that are necessary for proper functionality
testMCP.Labels = map[string]string{"machineconfiguration.openshift.io/role": roleMCPTest}
testMCP.Spec.MachineConfigSelector = &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "machineconfiguration.openshift.io/role",
Operator: metav1.LabelSelectorOpIn,
Values: []string{depnodes.RoleWorker, roleMCPTest},
},
},
}
testMCP.Spec.NodeSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{getLabelRoleMCPTest(): ""},
}

Expect(fxt.Client.Create(context.TODO(), testMCP)).To(Succeed())
defer func() {
By(fmt.Sprintf("CLEANUP: deleting mcp: %q", testMCP.Name))
Expect(fxt.Client.Delete(context.TODO(), testMCP)).To(Succeed())

err := wait.With(fxt.Client).
Interval(configuration.MachineConfigPoolUpdateInterval).
Timeout(configuration.MachineConfigPoolUpdateTimeout).
ForMachineConfigPoolDeleted(context.TODO(), testMCP)
Expect(err).ToNot(HaveOccurred())
}()
// keep the mcp with zero machine count intentionally to avoid reboots

testNG := nropv1.NodeGroup{
MachineConfigPoolSelector: &metav1.LabelSelector{
MatchLabels: testMCP.Labels,
},
}

By(fmt.Sprintf("modifying the NUMAResourcesOperator nodeGroups field to include new group: %q labels %q", testMCP.Name, testMCP.Labels))
newLogLevel := operatorv1.Trace
Eventually(func(g Gomega) {
// we need that for the current ResourceVersion
g.Expect(fxt.Client.Get(ctx, client.ObjectKeyFromObject(initialNroOperObj), &nroOperObj)).To(Succeed())

newNGs := append(nroOperObj.Spec.NodeGroups, testNG)
nroOperObj.Spec.NodeGroups = newNGs
nroOperObj.Spec.ExporterImage = serialconfig.GetRteCiImage()

if nroOperObj.Spec.LogLevel == operatorv1.Trace {
newLogLevel = operatorv1.Debug
}
nroOperObj.Spec.LogLevel = newLogLevel

g.Expect(fxt.Client.Update(ctx, &nroOperObj)).To(Succeed())
}).WithTimeout(5 * time.Minute).WithPolling(30 * time.Second).Should(Succeed())

defer func() {
By("CLEANUP: reverting the changes under the NUMAResourcesOperator object")
var nroOperObj nropv1.NUMAResourcesOperator
Eventually(func(g Gomega) {
// we need that for the current ResourceVersion
g.Expect(fxt.Client.Get(ctx, client.ObjectKeyFromObject(initialNroOperObj), &nroOperObj)).To(Succeed())

nroOperObj.Spec = initialNroOperObj.Spec
g.Expect(fxt.Client.Update(ctx, &nroOperObj)).To(Succeed())
}).WithTimeout(10 * time.Minute).WithPolling(30 * time.Second).Should(Succeed())
}() //end of defer

By("verify new RTE daemonset is created and all the RTE dameonsets are updated")
updatedNro := nropv1.NUMAResourcesOperator{}
Eventually(func(g Gomega) {
Expect(fxt.Client.Get(ctx, client.ObjectKeyFromObject(initialNroOperObj), &updatedNro)).To(Succeed())
dss, err := objects.GetDaemonSetsOwnedBy(fxt.Client, updatedNro.ObjectMeta)
g.Expect(err).ToNot(HaveOccurred())
// assumption 1:1 mapping to testMCP
g.Expect(dss).To(HaveLen(len(updatedNro.Spec.NodeGroups)), "daemonsets found owned by NRO object doesn't align with specified NodeGroups")

for _, ds := range dss {
By(fmt.Sprintf("check RTE daemonset %q", ds.Name))
if ds.Name == objectnames.GetComponentName(updatedNro.Name, roleMCPTest) {
By("check the correct match labels for the new RTE daemonset")
g.Expect(ds.Spec.Template.Spec.NodeSelector).To(Equal(testMCP.Spec.NodeSelector.MatchLabels))
}
By("check the correct image")
cnt := ds.Spec.Template.Spec.Containers[0]
g.Expect(cnt.Image).To(Equal(serialconfig.GetRteCiImage()))

By("checking the correct LogLevel")
found, match := matchLogLevelToKlog(&cnt, newLogLevel)
g.Expect(found).To(BeTrue(), "-v flag doesn't exist in container %q args under DaemonSet: %q", cnt.Name, ds.Name)
g.Expect(match).To(BeTrue(), "LogLevel %s doesn't match the existing -v flag in container: %q managed by DaemonSet: %q", updatedNro.Spec.LogLevel, cnt.Name, ds.Name)
}
}).WithTimeout(10*time.Minute).WithPolling(30*time.Second).Should(Succeed(), "failed to update RTE daemonset node selector")
})

It("[test_id:54916][tier2][schedrst] should be able to modify the configurable values under the NUMAResourcesScheduler CR", Label("tier2", "schedrst"), Label("feature:schedrst"), func() {
initialNroSchedObj := &nropv1.NUMAResourcesScheduler{}
nroSchedKey := objects.NROSchedObjectKey()
Expand Down

0 comments on commit e51970d

Please sign in to comment.