From fc56d14924cb15a8ba7a048dcb7b54407dc9cf1e Mon Sep 17 00:00:00 2001
From: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
Date: Thu, 24 Apr 2025 14:12:40 +0200
Subject: [PATCH 1/6] [no-relnote] Update E2E test suite

Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
---
 .github/workflows/e2e.yaml                    |  22 +-
 .gitignore                                    |   3 +-
 testdata/job-1.yaml                           |  25 +
 tests/e2e/Makefile                            |  43 +-
 tests/e2e/common/gpu_job.go                   |  60 --
 tests/e2e/common/kubernetes.go                | 216 -----
 tests/e2e/common/taints.go                    |  45 -
 tests/e2e/device-plugin_test.go               | 261 +++---
 tests/e2e/e2e_test.go                         | 868 +++++++++++++++++-
 tests/e2e/framework.go                        |  26 -
 tests/e2e/framework/framework.go              | 268 ------
 tests/e2e/framework/test_context.go           |  57 --
 tests/e2e/framework/util.go                   | 156 ----
 tests/e2e/gomega.go                           | 191 ----
 tests/e2e/gpu-feature-discovery_test.go       | 238 +++--
 tests/e2e/infra/aws.yaml                      |  17 +-
 tests/e2e/internal/kube.go                    | 288 ++++++
 .../k8s.io/apimachinery/pkg/util/uuid/uuid.go |  27 -
 tests/vendor/modules.txt                      |   1 -
 19 files changed, 1436 insertions(+), 1376 deletions(-)
 create mode 100644 testdata/job-1.yaml
 delete mode 100644 tests/e2e/common/gpu_job.go
 delete mode 100644 tests/e2e/common/kubernetes.go
 delete mode 100644 tests/e2e/common/taints.go
 delete mode 100644 tests/e2e/framework.go
 delete mode 100644 tests/e2e/framework/framework.go
 delete mode 100644 tests/e2e/framework/test_context.go
 delete mode 100644 tests/e2e/framework/util.go
 delete mode 100644 tests/e2e/gomega.go
 create mode 100644 tests/e2e/internal/kube.go
 delete mode 100644 tests/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go

diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml
index 0c6172318..64159ac70 100644
--- a/.github/workflows/e2e.yaml
+++ b/.github/workflows/e2e.yaml
@@ -58,11 +58,14 @@ jobs:
     - name: Run e2e tests
       env:
         KUBECONFIG: ${{ github.workspace }}/kubeconfig
+        HELM_CHART: ${{ github.workspace }}/deployments/helm/nvidia-device-plugin
         E2E_IMAGE_REPO: ghcr.io/nvidia/k8s-device-plugin
         E2E_IMAGE_TAG: ${{ inputs.version }}
+        E2E_IMAGE_PULL_POLICY: Always
+        NVIDIA_DRIVER_ENABLED: true
         LOG_ARTIFACTS: ${{ github.workspace }}/e2e_logs
       run: |
-        make test-e2e
+        make -f tests/e2e/Makefile test
 
     - name: Archive test logs
       if: ${{ failure() }}
@@ -72,6 +75,13 @@ jobs:
         path: ./e2e_logs/
         retention-days: 15
 
+    - name: Archive Ginkgo logs
+      uses: actions/upload-artifact@v4
+      with:
+        name: ginkgo-logs
+        path: ginkgo.json
+        retention-days: 15
+
     - name: Send Slack alert notification
       id: slack
       if: false
@@ -80,8 +90,10 @@ jobs:
         SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
         SUMMARY_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
       with:
-        channel-id: ${{ secrets.SLACK_CHANNEL_ID }}
-        slack-message: |
-          :x: On repository ${{ github.repository }} the Workflow *${{ github.workflow }}* has failed.
+        method: chat.postMessage
+        token: ${{ secrets.SLACK_BOT_TOKEN }}
+        payload: |
+          channel: ${{ secrets.SLACK_CHANNEL_ID }}
+          text: ":x: On repository ${{ github.repository }} the Workflow *${{ github.workflow }}* has failed.
 
-          Details: ${{ env.SUMMARY_URL }}
+          Details: ${{ env.SUMMARY_URL }}"
diff --git a/.gitignore b/.gitignore
index f0ef8b39a..4c5a9150c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 deployments/helm/gpu-feature-discovery
 cmd/gpu-feature-discovery/gfd-test-loop
 e2e_logs
-
+bin
 *.out
 *.log
+ginkgo.json
diff --git a/testdata/job-1.yaml b/testdata/job-1.yaml
new file mode 100644
index 000000000..1ac7c853a
--- /dev/null
+++ b/testdata/job-1.yaml
@@ -0,0 +1,25 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: j-e2e-1
+  labels:
+    app.nvidia.com: k8s-device-plugin-test-app
+spec:
+  template:
+    metadata:
+      name: gpu-pod
+    spec:
+      restartPolicy: Never
+      containers:
+        - name: cuda-container
+          image: nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1-ubuntu18.04
+          args:
+            - "--benchmark"
+            - "--numbodies=10000"
+          resources:
+            limits:
+              nvidia.com/gpu: "1"
+      tolerations:
+        - key: "nvidia.com/gpu"
+          operator: "Exists"
+          effect: "NoSchedule"
diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile
index 03a802a64..03c15c1fd 100644
--- a/tests/e2e/Makefile
+++ b/tests/e2e/Makefile
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ GO_TEST_TIMEOUT ?= 30m
 
 include $(CURDIR)/versions.mk
 
-DRIVER_ENABLED ?= true
+NVIDIA_DRIVER_ENABLED ?= true
 
 E2E_IMAGE_REPO ?= $(REGISTRY)/$(DRIVER_NAME)
 E2E_IMAGE_TAG ?= $(VERSION)
@@ -28,21 +28,24 @@ E2E_IMAGE_PULL_POLICY ?= IfNotPresent
 HELM_CHART ?= $(CURDIR)/deployments/helm/nvidia-device-plugin
 LOG_ARTIFACTS ?= $(CURDIR)/e2e_logs
 
-.PHONY: test
-test:
-	@if [ -z ${KUBECONFIG} ]; then \
-		echo "[ERR] KUBECONFIG missing, must be defined"; \
-		exit 1; \
-	fi
-	cd $(CURDIR)/tests/e2e && $(GO_CMD) test -timeout $(GO_TEST_TIMEOUT) -v . -args \
-		-kubeconfig=$(KUBECONFIG) \
-		-driver-enabled=$(DRIVER_ENABLED) \
-		-image.repo=$(E2E_IMAGE_REPO) \
-		-image.tag=$(E2E_IMAGE_TAG) \
-		-image.pull-policy=$(E2E_IMAGE_PULL_POLICY) \
-		-log-artifacts=$(LOG_ARTIFACTS) \
-		-helm-chart=$(HELM_CHART) \
-		-helm-log-file=$(LOG_ARTIFACTS)/helm.log \
-		-ginkgo.focus="\[nvidia\]" \
-		-test.timeout=1h \
-		-ginkgo.v
+# Test configuration
+GINKGO_COMMON_ARGS := -v --fail-on-pending --randomize-all --trace
+GINKGO_REPORT_ARGS := --json-report=$(LOG_ARTIFACTS)/report.json --junit-report=$(LOG_ARTIFACTS)/junit.xml
+
+.PHONY: ginkgo test clean-artifacts
+
+ginkgo:
+	mkdir -p $(CURDIR)/bin
+	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
+
+# Create artifacts directory
+$(LOG_ARTIFACTS):
+	mkdir -p $(LOG_ARTIFACTS)
+
+# Clean artifacts
+clean-artifacts:
+	rm -rf $(LOG_ARTIFACTS)
+
+# Run tests
+test: ginkgo $(LOG_ARTIFACTS)
+	$(CURDIR)/bin/ginkgo $(GINKGO_COMMON_ARGS) $(GINKGO_REPORT_ARGS) $(GINKGO_ARGS) ./tests/e2e/...
diff --git a/tests/e2e/common/gpu_job.go b/tests/e2e/common/gpu_job.go
deleted file mode 100644
index fb512f36f..000000000
--- a/tests/e2e/common/gpu_job.go
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package common
-
-import (
-	batchv1 "k8s.io/api/batch/v1"
-	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/resource"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-// Define the Job
-var GPUJob = &batchv1.Job{
-	ObjectMeta: metav1.ObjectMeta{
-		Name: "gpu-job",
-	},
-	Spec: batchv1.JobSpec{
-		Template: v1.PodTemplateSpec{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "gpu-pod",
-			},
-			Spec: v1.PodSpec{
-				RestartPolicy: "Never",
-				Containers: []v1.Container{
-					{
-						Name:  "cuda-container",
-						Image: "nvcr.io/nvidia/k8s/cuda-sample:nbody-cuda11.7.1-ubuntu18.04",
-						Args:  []string{"--benchmark", "--numbodies=10000"},
-						Resources: v1.ResourceRequirements{
-							Limits: v1.ResourceList{
-								"nvidia.com/gpu": resource.MustParse("1"),
-							},
-						},
-					},
-				},
-				Tolerations: []v1.Toleration{
-					{
-						Key:      "nvidia.com/gpu",
-						Operator: "Exists",
-						Effect:   "NoSchedule",
-					},
-				},
-			},
-		},
-	},
-}
diff --git a/tests/e2e/common/kubernetes.go b/tests/e2e/common/kubernetes.go
deleted file mode 100644
index 0c72d7751..000000000
--- a/tests/e2e/common/kubernetes.go
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package common
-
-import (
-	"context"
-	"fmt"
-	"strings"
-	"time"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-
-	corev1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	clientset "k8s.io/client-go/kubernetes"
-	nfdclient "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned"
-	nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1"
-)
-
-// GetNonControlPlaneNodes gets the nodes that are not tainted for exclusive control-plane usage
-func GetNonControlPlaneNodes(ctx context.Context, cli clientset.Interface) ([]corev1.Node, error) {
-	nodeList, err := cli.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
-	if err != nil {
-		return nil, err
-	}
-	if len(nodeList.Items) == 0 {
-		return nil, fmt.Errorf("no nodes found in the cluster")
-	}
-
-	controlPlaneTaint := corev1.Taint{
-		Effect: corev1.TaintEffectNoSchedule,
-		Key:    "node-role.kubernetes.io/control-plane",
-	}
-	out := []corev1.Node{}
-	for _, node := range nodeList.Items {
-		if !TaintExists(node.Spec.Taints, &controlPlaneTaint) {
-			out = append(out, node)
-		}
-	}
-
-	if len(out) == 0 {
-		return nil, fmt.Errorf("no non-control-plane nodes found in the cluster")
-	}
-	return out, nil
-}
-
-func GetNode(nodes []corev1.Node, nodeName string) corev1.Node {
-	for _, node := range nodes {
-		if node.Name == nodeName {
-			return node
-		}
-	}
-	return corev1.Node{}
-}
-
-// CleanupNode deletes all NFD/GFD related metadata from the Node object, i.e.
-// labels and annotations
-func CleanupNode(ctx context.Context, cs clientset.Interface) {
-	// Per-node cleanup function
-	cleanup := func(nodeName string) error {
-		node, err := cs.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
-		Expect(err).NotTo(HaveOccurred())
-
-		update := false
-		updateStatus := false
-		// Gather info about all NFD-managed node assets outside the default prefix
-		nfdLabels := map[string]struct{}{}
-		for _, name := range strings.Split(node.Annotations[nfdv1alpha1.FeatureLabelsAnnotation], ",") {
-			if strings.Contains(name, "/") {
-				nfdLabels[name] = struct{}{}
-			}
-		}
-		nfdERs := map[string]struct{}{}
-		for _, name := range strings.Split(node.Annotations[nfdv1alpha1.ExtendedResourceAnnotation], ",") {
-			if strings.Contains(name, "/") {
-				nfdERs[name] = struct{}{}
-			}
-		}
-
-		// Remove labels
-		for key := range node.Labels {
-			_, ok := nfdLabels[key]
-			if ok || strings.HasPrefix(key, nfdv1alpha1.FeatureLabelNs) {
-				delete(node.Labels, key)
-				update = true
-			}
-		}
-
-		// Remove annotations
-		for key := range node.Annotations {
-			if strings.HasPrefix(key, nfdv1alpha1.AnnotationNs) {
-				delete(node.Annotations, key)
-				update = true
-			}
-		}
-
-		// Remove taints
-		for _, taint := range node.Spec.Taints {
-			taint := taint
-			if strings.HasPrefix(taint.Key, nfdv1alpha1.TaintNs) {
-				newTaints, removed := DeleteTaint(node.Spec.Taints, &taint)
-				if removed {
-					node.Spec.Taints = newTaints
-					update = true
-				}
-			}
-		}
-
-		// Remove extended resources
-		for key := range node.Status.Capacity {
-			// We check for FeatureLabelNs as -resource-labels can create ERs there
-			_, ok := nfdERs[string(key)]
-			if ok || strings.HasPrefix(string(key), nfdv1alpha1.FeatureLabelNs) {
-				delete(node.Status.Capacity, key)
-				delete(node.Status.Allocatable, key)
-				updateStatus = true
-			}
-		}
-
-		if updateStatus {
-			By("[Cleanup]\tDeleting NFD extended resources from node " + nodeName)
-			if _, err := cs.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
-				return err
-			}
-		}
-
-		if update {
-			By("[Cleanup]\tDeleting NFD labels, annotations and taints from node " + node.Name)
-			if _, err := cs.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil {
-				return err
-			}
-		}
-
-		return nil
-	}
-
-	// Cleanup all nodes
-	nodeList, err := cs.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
-	Expect(err).NotTo(HaveOccurred())
-
-	for _, n := range nodeList.Items {
-		var err error
-		for retry := 0; retry < 5; retry++ {
-			if err = cleanup(n.Name); err == nil {
-				break
-			}
-			time.Sleep(100 * time.Millisecond)
-		}
-		Expect(err).NotTo(HaveOccurred())
-	}
-}
-
-func CleanupNFDObjects(ctx context.Context, cli *nfdclient.Clientset, namespace string) {
-	cleanupNodeFeatureRules(ctx, cli)
-	cleanupNodeFeatures(ctx, cli, namespace)
-}
-
-// cleanupNodeFeatures deletes all NodeFeature objects in the given namespace
-func cleanupNodeFeatures(ctx context.Context, cli *nfdclient.Clientset, namespace string) {
-	nfs, err := cli.NfdV1alpha1().NodeFeatures(namespace).List(ctx, metav1.ListOptions{})
-	if errors.IsNotFound(err) {
-		// Omitted error, nothing to do.
-		return
-	}
-	Expect(err).NotTo(HaveOccurred())
-
-	if len(nfs.Items) != 0 {
-		By("[Cleanup]\tDeleting NodeFeature objects from namespace " + namespace)
-		for _, nf := range nfs.Items {
-			err = cli.NfdV1alpha1().NodeFeatures(namespace).Delete(ctx, nf.Name, metav1.DeleteOptions{})
-			if errors.IsNotFound(err) {
-				// Omitted error
-				continue
-			}
-			Expect(err).NotTo(HaveOccurred())
-		}
-	}
-}
-
-// cleanupNodeFeatureRules deletes all NodeFeatureRule objects
-func cleanupNodeFeatureRules(ctx context.Context, cli *nfdclient.Clientset) {
-	nfrs, err := cli.NfdV1alpha1().NodeFeatureRules().List(ctx, metav1.ListOptions{})
-	if errors.IsNotFound(err) {
-		// Omitted error, nothing to do.
-		return
-	}
-	Expect(err).NotTo(HaveOccurred())
-
-	if len(nfrs.Items) != 0 {
-		By("[Cleanup]\tDeleting NodeFeatureRule objects from the cluster")
-		for _, nfr := range nfrs.Items {
-			err = cli.NfdV1alpha1().NodeFeatureRules().Delete(ctx, nfr.Name, metav1.DeleteOptions{})
-			if errors.IsNotFound(err) {
-				// Omitted error
-				continue
-			}
-			Expect(err).NotTo(HaveOccurred())
-		}
-	}
-}
diff --git a/tests/e2e/common/taints.go b/tests/e2e/common/taints.go
deleted file mode 100644
index 217814eda..000000000
--- a/tests/e2e/common/taints.go
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package common
-
-import (
-	corev1 "k8s.io/api/core/v1"
-)
-
-// TaintExists checks if the given taint exists in list of taints. Returns true if exists false otherwise.
-func TaintExists(taints []corev1.Taint, taintToFind *corev1.Taint) bool {
-	for _, taint := range taints {
-		if taint.MatchTaint(taintToFind) {
-			return true
-		}
-	}
-	return false
-}
-
-// DeleteTaint removes all the taints that have the same key and effect to given taintToDelete.
-func DeleteTaint(taints []corev1.Taint, taintToDelete *corev1.Taint) ([]corev1.Taint, bool) {
-	newTaints := []corev1.Taint{}
-	deleted := false
-	for i := range taints {
-		if taintToDelete.MatchTaint(&taints[i]) {
-			deleted = true
-			continue
-		}
-		newTaints = append(newTaints, taints[i])
-	}
-	return newTaints, deleted
-}
diff --git a/tests/e2e/device-plugin_test.go b/tests/e2e/device-plugin_test.go
index d171bfb00..1d8540333 100644
--- a/tests/e2e/device-plugin_test.go
+++ b/tests/e2e/device-plugin_test.go
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,7 +18,6 @@
 package e2e
 
 import (
-	"context"
 	"fmt"
 	"strings"
 	"time"
@@ -27,14 +27,10 @@ import (
 
 	helm "github.com/mittwald/go-helm-client"
 	helmValues "github.com/mittwald/go-helm-client/values"
-	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
-	extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/util/rand"
 
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common"
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common/diagnostics"
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework"
+	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/internal"
 )
 
 const (
@@ -42,153 +38,140 @@ const (
 )
 
 // Actual test suite
-var _ = NVDescribe("GPU Device Plugin", func() {
-	f := framework.NewFramework("k8s-device-plugin")
-
-	Context("When deploying k8s-device-plugin", Ordered, func() {
-		// helm-chart is required
-		if *HelmChart == "" {
-			Fail("No helm-chart for k8s-device-plugin specified")
+var _ = Describe("GPU Device Plugin", Ordered, Label("gpu", "e2e", "device-plugin"), func() {
+	// Init global suite vars vars
+	var (
+		helmReleaseName string
+		chartSpec       helm.ChartSpec
+
+		collectLogsFrom      []string
+		diagnosticsCollector *diagnostics.Diagnostic
+	)
+
+	collectLogsFrom = []string{
+		"pods",
+		"nodes",
+		"namespaces",
+		"deployments",
+		"daemonsets",
+		"jobs",
+	}
+	if CollectLogsFrom != "" && CollectLogsFrom != "default" {
+		collectLogsFrom = strings.Split(CollectLogsFrom, ",")
+	}
+
+	values := helmValues.Options{
+		Values: []string{
+			fmt.Sprintf("image.repository=%s", ImageRepo),
+			fmt.Sprintf("image.tag=%s", ImageTag),
+			fmt.Sprintf("image.pullPolicy=%s", ImagePullPolicy),
+			"devicePlugin.enabled=true",
+			// We need to make affinity null, if not deploying NFD/GFD
+			// test will fail if not run on a GPU node
+			"affinity=",
+		},
+	}
+
+	BeforeAll(func(ctx SpecContext) {
+		// Create clients for apiextensions and our CRD api
+		helmReleaseName = "nvdp-e2e-test-" + randomSuffix()
+
+		chartSpec = helm.ChartSpec{
+			ReleaseName:   helmReleaseName,
+			ChartName:     HelmChart,
+			Namespace:     testNamespace.Name,
+			Wait:          true,
+			Timeout:       1 * time.Minute,
+			ValuesOptions: values,
+			CleanupOnFail: true,
 		}
 
-		// Init global suite vars vars
-		var (
-			crds      []*apiextensionsv1.CustomResourceDefinition
-			extClient *extclient.Clientset
-
-			helmReleaseName string
-			chartSpec       helm.ChartSpec
-
-			collectLogsFrom      []string
-			diagnosticsCollector *diagnostics.Diagnostic
-		)
-
-		defaultCollectorObjects := []string{
-			"pods",
-			"nodes",
-			"namespaces",
-			"deployments",
-			"daemonsets",
-			"jobs",
-		}
+		By("Installing k8s-device-plugin Helm chart")
+		_, err := helmClient.InstallChart(ctx, &chartSpec, nil)
+		Expect(err).NotTo(HaveOccurred())
 
-		values := helmValues.Options{
-			Values: []string{
-				fmt.Sprintf("image.repository=%s", *ImageRepo),
-				fmt.Sprintf("image.tag=%s", *ImageTag),
-				fmt.Sprintf("image.pullPolicy=%s", *ImagePullPolicy),
-				"devicePlugin.enabled=true",
-				// We need to make affinity is none if not deploying NFD/GFD
-				// test will fail if not run on a GPU node
-				"affinity=",
-			},
-		}
+		// Wait for all DaemonSets to be ready
+		// Note: DaemonSet names are dynamically generated with the Helm release prefix,
+		// so we wait for all DaemonSets in the namespace rather than specific names
+		By("Waiting for all DaemonSets to be ready")
+		err = internal.WaitForDaemonSetsReady(ctx, clientSet, testNamespace.Name, "app.kubernetes.io/name=nvidia-device-plugin")
+		Expect(err).NotTo(HaveOccurred())
+	})
 
-		// check Collector objects
-		collectLogsFrom = defaultCollectorObjects
-		if *CollectLogsFrom != "" && *CollectLogsFrom != "default" {
-			collectLogsFrom = strings.Split(*CollectLogsFrom, ",")
+	AfterAll(func(ctx SpecContext) {
+		By("Uninstalling k8s-device-plugin Helm chart")
+		err := helmClient.UninstallReleaseByName(helmReleaseName)
+		if err != nil {
+			GinkgoWriter.Printf("Failed to uninstall helm release %s: %v\n", helmReleaseName, err)
 		}
+	})
 
-		BeforeAll(func(ctx context.Context) {
-			// Create clients for apiextensions and our CRD api
-			extClient = extclient.NewForConfigOrDie(f.ClientConfig())
-			helmReleaseName = "nvdp-e2e-test" + rand.String(5)
-
-			chartSpec = helm.ChartSpec{
-				ReleaseName:   helmReleaseName,
-				ChartName:     *HelmChart,
-				Namespace:     f.Namespace.Name,
-				Wait:          true,
-				Timeout:       1 * time.Minute,
-				ValuesOptions: values,
-				CleanupOnFail: true,
-			}
-
-			By("Installing k8s-device-plugin Helm chart")
-			_, err := f.HelmClient.InstallChart(ctx, &chartSpec, nil)
+	AfterEach(func(ctx SpecContext) {
+		// Run diagnostic collector if test failed
+		if CurrentSpecReport().Failed() {
+			var err error
+			diagnosticsCollector, err = diagnostics.New(
+				diagnostics.WithNamespace(testNamespace.Name),
+				diagnostics.WithArtifactDir(LogArtifactDir),
+				diagnostics.WithKubernetesClient(clientSet),
+				diagnostics.WithObjects(collectLogsFrom...),
+			)
 			Expect(err).NotTo(HaveOccurred())
-		})
 
-		JustBeforeEach(func(ctx context.Context) {
-		})
+			err = diagnosticsCollector.Collect(ctx)
+			Expect(err).NotTo(HaveOccurred())
+		}
+	})
 
-		AfterEach(func(ctx context.Context) {
-			// Run diagnostic collector if test failed
-			if CurrentSpecReport().Failed() {
-				var err error
-				diagnosticsCollector, err = diagnostics.New(
-					diagnostics.WithNamespace(f.Namespace.Name),
-					diagnostics.WithArtifactDir(*LogArtifactDir),
-					diagnostics.WithKubernetesClient(f.ClientSet),
-					diagnostics.WithObjects(collectLogsFrom...),
-				)
-				Expect(err).NotTo(HaveOccurred())
-
-				err = diagnosticsCollector.Collect(ctx)
-				Expect(err).NotTo(HaveOccurred())
-			}
-		})
+	When("When deploying k8s-device-plugin", Ordered, Label("serial"), func() {
+		It("it should create nvidia.com/gpu resource", Label("gpu-resource"), func(ctx SpecContext) {
+			nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+			Expect(err).NotTo(HaveOccurred())
+			Expect(len(nodeList.Items)).ToNot(BeZero())
 
-		AfterAll(func(ctx context.Context) {
-			// Delete Helm release
-			err := f.HelmClient.UninstallReleaseByName(helmReleaseName)
+			// We pick one node
+			nodes, err := getNonControlPlaneNodes(ctx, clientSet)
 			Expect(err).NotTo(HaveOccurred())
 
-			for _, crd := range crds {
-				err := extClient.ApiextensionsV1().CustomResourceDefinitions().Delete(ctx, crd.Name, metav1.DeleteOptions{})
-				Expect(err).NotTo(HaveOccurred())
-			}
+			targetNodeName := nodes[0].Name
+			Expect(targetNodeName).ToNot(BeEmpty(), "No suitable worker node found")
 
-			// TODO: Add a check for a zero node capacity.
+			By("Checking the node capacity")
+			capacityChecker := map[string]k8sLabels{
+				targetNodeName: {
+					"nvidia.com/gpu": "^[1-9]$",
+				}}
+			eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchCapacity(capacityChecker, nodes), "Node capacity does not match")
 		})
-
-		Context("and NV Driver is installed", func() {
-			It("it should create nvidia.com/gpu resource", func(ctx context.Context) {
-				nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
-				Expect(err).NotTo(HaveOccurred())
-				Expect(len(nodeList.Items)).ToNot(BeZero())
-
-				// We pick one node
-				nodes, err := common.GetNonControlPlaneNodes(ctx, f.ClientSet)
-				Expect(err).NotTo(HaveOccurred())
-
-				targetNodeName := nodes[0].Name
-				Expect(targetNodeName).ToNot(BeEmpty(), "No suitable worker node found")
-
-				By("Checking the node capacity")
-				capacityChecker := map[string]k8sLabels{
-					targetNodeName: {
-						"nvidia.com/gpu": "^[1-9]$",
-					}}
-				eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchCapacity(capacityChecker, nodes), "Node capacity does not match")
-
-				// TODO: As a workaround to installing and reinstalling client causing
-				// the required resources to not be available, we merge the two tests.
-				// })
-				// It("it should run GPU jobs", func(ctx context.Context) {
-				// 	By("Creating a GPU job")
-				job := common.GPUJob.DeepCopy()
-				job.Namespace = f.Namespace.Name
-
-				_, err = f.ClientSet.BatchV1().Jobs(f.Namespace.Name).Create(ctx, job, metav1.CreateOptions{})
-				Expect(err).NotTo(HaveOccurred())
-
-				By("Waiting for job to complete")
-				Eventually(func() error {
-					job, err := f.ClientSet.BatchV1().Jobs(f.Namespace.Name).Get(ctx, job.Name, metav1.GetOptions{})
-					if err != nil {
-						return err
-					}
-					if job.Status.Succeeded != 1 {
-						return fmt.Errorf("job %s/%s failed", job.Namespace, job.Name)
-					}
-					if job.Status.Succeeded == 1 {
-						return nil
-					}
-					return fmt.Errorf("job %s/%s not completed yet", job.Namespace, job.Name)
-				}, devicePluginEventuallyTimeout, 5*time.Second).Should(BeNil())
+		It("it should run GPU jobs", Label("gpu-job"), func(ctx SpecContext) {
+			By("Creating a GPU job")
+			jobNames, err := CreateOrUpdateJobsFromFile(ctx, clientSet, "job-1.yaml", testNamespace.Name)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(jobNames).NotTo(BeEmpty())
+
+			// Defer cleanup for the job
+			DeferCleanup(func(ctx SpecContext) {
+				By("Deleting the GPU job")
+				err := clientSet.BatchV1().Jobs(testNamespace.Name).Delete(ctx, jobNames[0], metav1.DeleteOptions{})
+				if err != nil {
+					GinkgoWriter.Printf("Failed to delete job %s: %v\n", jobNames[0], err)
+				}
 			})
+
+			By("Waiting for job to complete")
+			Eventually(func(g Gomega) error {
+				job, err := clientSet.BatchV1().Jobs(testNamespace.Name).Get(ctx, jobNames[0], metav1.GetOptions{})
+				if err != nil {
+					return err
+				}
+				if job.Status.Failed > 0 {
+					return fmt.Errorf("job %s/%s has failed pods: %d", job.Namespace, job.Name, job.Status.Failed)
+				}
+				if job.Status.Succeeded != 1 {
+					return fmt.Errorf("job %s/%s not completed yet: %d succeeded", job.Namespace, job.Name, job.Status.Succeeded)
+				}
+				return nil
+			}).WithContext(ctx).WithPolling(5 * time.Second).WithTimeout(devicePluginEventuallyTimeout).Should(Succeed())
 		})
 	})
 })
diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go
index fb0020f5d..54b192cf0 100644
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,46 +18,859 @@
 package e2e
 
 import (
-	"flag"
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
 	"log"
 	"os"
+	"path/filepath"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
 	"testing"
+	"time"
 
-	"github.com/onsi/ginkgo/v2"
-	"github.com/onsi/gomega"
+	helm "github.com/mittwald/go-helm-client"
+	nfdclient "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned"
+	nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1"
 
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	gomegatypes "github.com/onsi/gomega/types"
+
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	apiruntime "k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/rand"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/apimachinery/pkg/util/yaml"
+	clientset "k8s.io/client-go/kubernetes"
+	k8sscheme "k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+const (
+	// DefaultNamespaceDeletionTimeout is timeout duration for waiting for a namespace deletion.
+	DefaultNamespaceDeletionTimeout = 10 * time.Minute
+
+	// PollInterval is how often to Poll pods, nodes and claims.
+	PollInterval = 2 * time.Second
 )
 
 var (
-	NVIDIA_DRIVER_ENABLED = flag.Bool("driver-enabled", false, "NVIDIA driver is installed on test infra")
-	HelmChart             = flag.String("helm-chart", "", "Helm chart to use")
-	LogArtifactDir        = flag.String("log-artifacts", "", "Directory to store logs")
-	ImageRepo             = flag.String("image.repo", "", "Image repository to fetch image from")
-	ImageTag              = flag.String("image.tag", "", "Image tag to use")
-	ImagePullPolicy       = flag.String("image.pull-policy", "IfNotPresent", "Image pull policy")
-	CollectLogsFrom       = flag.String("collect-logs-from", "default", "Comma separated list of objects to collect logs from after test")
+	Kubeconfig            string
+	Timeout               time.Duration
+	HelmChart             string
+	LogArtifactDir        string
+	ImageRepo             string
+	ImageTag              string
+	ImagePullPolicy       string
+	CollectLogsFrom       string
+	cwd                   string
+	NVIDIA_DRIVER_ENABLED bool
+
+	// k8s clients
+	clientConfig *rest.Config
+	clientSet    clientset.Interface
+	extClient    *extclient.Clientset
+	nfdClient    *nfdclient.Clientset
+
+	testNamespace *corev1.Namespace // Every test has at least one namespace unless creation is skipped
+
+	// Helm
+	helmClient      helm.Client
+	helmLogFile     *os.File
+	helmArtifactDir string
+	helmLogger      *log.Logger
+	helmReleaseName string
+
+	ctx         context.Context
+	packagePath string
 )
 
-func TestMain(m *testing.M) {
-	// Register test flags, then parse flags.
-	framework.RegisterClusterFlags(flag.CommandLine)
-	flag.Parse()
+func TestMain(t *testing.T) {
+	suiteName := "E2E K8s Device Plugin"
+
+	RegisterFailHandler(Fail)
+
+	// get the package path
+	_, thisFile, _, _ := runtime.Caller(0)
+	packagePath = filepath.Dir(thisFile)
+
+	ctx = context.Background()
+	getTestEnv()
+
+	// Log random seed for reproducibility
+	GinkgoWriter.Printf("Random seed: %d\n", GinkgoRandomSeed())
+
+	RunSpecs(t,
+		suiteName,
+		Label("e2e"),
+	)
+}
+
+// BeforeSuite runs before the test suite
+var _ = BeforeSuite(func(ctx SpecContext) {
+	var err error
+
+	cwd, err = os.Getwd()
+	Expect(err).NotTo(HaveOccurred())
+
+	// Get k8s clients
+	getK8sClients()
+
+	// Create clients for apiextensions and our CRD api
+	extClient = extclient.NewForConfigOrDie(clientConfig)
+
+	// Create a namespace for the test
+	testNamespace, err = CreateTestingNS("k8s-device-plugin-e2e-test", clientSet, nil)
+	Expect(err).NotTo(HaveOccurred())
+
+	// Get Helm client
+	helmReleaseName = "k8s-device-plugin-e2e-test" + rand.String(5)
+	getHelmClient()
+})
+
+var _ = AfterSuite(func(ctx SpecContext) {
+	By("Cleaning up namespace resources")
+	cleanupNamespaceResources(testNamespace.Name)
+
+	By("Deleting the test namespace")
+	deleteTestNamespace()
+})
+
+// Add ReportAfterSuite for logging test summary and random seed
+var _ = ReportAfterSuite("", func(report Report) {
+	// Log test summary
+	failedCount := 0
+	for _, specReport := range report.SpecReports {
+		if specReport.Failed() {
+			failedCount++
+		}
+	}
+
+	GinkgoWriter.Printf("\nTest Summary:\n")
+	GinkgoWriter.Printf("  Total Specs: %d\n", len(report.SpecReports))
+	GinkgoWriter.Printf("  Random Seed: %d\n", report.SuiteConfig.RandomSeed)
+	GinkgoWriter.Printf("  Failed: %d\n", failedCount)
+	GinkgoWriter.Printf("  Duration: %.2fs\n", report.RunTime.Seconds())
+})
+
+// getK8sClients creates the k8s clients
+func getK8sClients() {
+	var err error
+
+	// get config from kubeconfig
+	c, err := clientcmd.LoadFromFile(Kubeconfig)
+	Expect(err).NotTo(HaveOccurred())
+
+	// get client config
+	clientConfig, err = clientcmd.NewDefaultClientConfig(*c, &clientcmd.ConfigOverrides{}).ClientConfig()
+	Expect(err).NotTo(HaveOccurred())
+
+	clientSet, err = clientset.NewForConfig(clientConfig)
+	Expect(err).NotTo(HaveOccurred())
+
+	// Create clients for apiextensions and our CRD api
+	nfdClient = nfdclient.NewForConfigOrDie(clientConfig)
+}
+
+// getHelmClient creates a new Helm client
+func getHelmClient() {
+	var err error
+
+	// Set Helm log file
+	helmArtifactDir = filepath.Join(LogArtifactDir, "helm")
+
+	// Create a Helm client
+	err = os.MkdirAll(helmArtifactDir, 0755)
+	Expect(err).NotTo(HaveOccurred())
+
+	helmLogFile, err = os.OpenFile(filepath.Join(LogArtifactDir, "helm_logs"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
+	Expect(err).NotTo(HaveOccurred())
+
+	helmLogger = log.New(helmLogFile, fmt.Sprintf("%s\t", testNamespace.Name), log.Ldate|log.Ltime)
+
+	helmRestConf := &helm.RestConfClientOptions{
+		Options: &helm.Options{
+			Namespace:        testNamespace.Name,
+			RepositoryCache:  "/tmp/.helmcache",
+			RepositoryConfig: "/tmp/.helmrepo",
+			Debug:            true,
+			DebugLog:         helmLogger.Printf,
+		},
+		RestConfig: clientConfig,
+	}
+
+	helmClient, err = helm.NewClientFromRestConf(helmRestConf)
+	Expect(err).NotTo(HaveOccurred())
+}
+
+// getTestEnv gets the test environment variables
+func getTestEnv() {
+	defer GinkgoRecover()
+	var err error
+
+	Kubeconfig = getRequiredEnvvar[string]("KUBECONFIG")
+
+	Timeout = time.Duration(getEnvVarOrDefault("E2E_TIMEOUT_SECONDS", 1800)) * time.Second
+
+	HelmChart = getRequiredEnvvar[string]("HELM_CHART")
+
+	LogArtifactDir = getEnvVarOrDefault("LOG_ARTIFACTS_DIR", "e2e_logs")
+
+	ImageRepo = getRequiredEnvvar[string]("E2E_IMAGE_REPO")
+
+	ImageTag = getRequiredEnvvar[string]("E2E_IMAGE_TAG")
+
+	ImagePullPolicy = getRequiredEnvvar[string]("E2E_IMAGE_PULL_POLICY")
+
+	CollectLogsFrom = getEnvVarOrDefault("COLLECT_LOGS_FROM", "")
+
+	NVIDIA_DRIVER_ENABLED = getEnvVarOrDefault("NVIDIA_DRIVER_ENABLED", false)
+
+	// Get current working directory
+	cwd, err = os.Getwd()
+	Expect(err).NotTo(HaveOccurred())
+}
+
+// CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
+// Please see NewFramework instead of using this directly.
+func CreateTestingNS(baseName string, c clientset.Interface, labels map[string]string) (*corev1.Namespace, error) {
+	uid := rand.String(5)
+	if labels == nil {
+		labels = map[string]string{}
+	}
+	labels["e2e-run"] = uid
+
+	// We don't use ObjectMeta.GenerateName feature, as in case of API call
+	// failure we don't know whether the namespace was created and what is its
+	// name.
+	name := fmt.Sprintf("%v-%v", baseName, uid)
+
+	namespaceObj := &corev1.Namespace{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: "",
+			Labels:    labels,
+		},
+		Status: corev1.NamespaceStatus{},
+	}
+	// Be robust about making the namespace creation call.
+	var got *corev1.Namespace
+	if err := wait.PollUntilContextTimeout(ctx, PollInterval, 30*time.Second, true, func(ctx context.Context) (bool, error) {
+		var err error
+		got, err = c.CoreV1().Namespaces().Create(ctx, namespaceObj, metav1.CreateOptions{})
+		if err != nil {
+			if k8serrors.IsAlreadyExists(err) {
+				// regenerate on conflict
+				namespaceObj.Name = fmt.Sprintf("%v-%v", baseName, uid)
+			}
+			return false, nil
+		}
+		return true, nil
+	}); err != nil {
+		return nil, err
+	}
+
+	return got, nil
+}
+
+type k8sLabels map[string]string
+
+// eventuallyNonControlPlaneNodes is a helper for asserting node properties
+//
+//nolint:unused
+func eventuallyNonControlPlaneNodes(ctx context.Context, cli clientset.Interface) AsyncAssertion {
+	return Eventually(func(g Gomega) ([]corev1.Node, error) {
+		return getNonControlPlaneNodes(ctx, cli)
+	}).WithPolling(1 * time.Second).WithTimeout(1 * time.Minute).WithContext(ctx)
+}
+
+// MatchLabels returns a specialized Gomega matcher for checking if a list of
+// nodes are labeled as expected.
+func MatchLabels(expectedNew map[string]k8sLabels, oldNodes []corev1.Node) gomegatypes.GomegaMatcher {
+	return &nodeListPropertyRegexpMatcher[k8sLabels]{
+		propertyName: "labels",
+		expected:     expectedNew,
+		oldNodes:     oldNodes,
+	}
+}
+
+// MatchCapacity returns a specialized Gomega matcher for checking if a list of
+// nodes are configured as expected.
+func MatchCapacity(expectedNew map[string]k8sLabels, oldNodes []corev1.Node) gomegatypes.GomegaMatcher {
+	return &nodeListPropertyRegexpMatcher[k8sLabels]{
+		propertyName: "capacity",
+		expected:     expectedNew,
+		oldNodes:     oldNodes,
+	}
+}
+
+// nodeListPropertyRegexpMatcher is a generic Gomega matcher for asserting one property a group of nodes.
+type nodeListPropertyRegexpMatcher[T any] struct {
+	expected map[string]k8sLabels
+	oldNodes []corev1.Node
+
+	propertyName string
+	node         *corev1.Node //nolint:unused
+	missing      []string     //nolint:unused
+	invalidValue []string     //nolint:unused
+}
+
+// Match method of the GomegaMatcher interface.
+func (m *nodeListPropertyRegexpMatcher[T]) Match(actual interface{}) (bool, error) {
+	nodes, ok := actual.([]corev1.Node)
+	if !ok {
+		return false, fmt.Errorf("expected []corev1.Node, got: %T", actual)
+	}
+
+	switch m.propertyName {
+	case "labels":
+		return m.matchLabels(nodes), nil
+	case "capacity":
+		return m.matchCapacity(nodes), nil
+	default:
+		return true, nil
+	}
+
+}
+
+func (m *nodeListPropertyRegexpMatcher[T]) matchLabels(nodes []corev1.Node) bool {
+	targetNode := corev1.Node{}
+	for _, node := range nodes {
+		_, ok := m.expected[node.Name]
+		if !ok {
+			continue
+		}
+		targetNode = node
+		break
+	}
+
+	m.node = &targetNode
+
+	for labelKey, labelValue := range m.expected[targetNode.Name] {
+		// missing key
+		if _, ok := targetNode.Labels[labelKey]; !ok {
+			m.missing = append(m.missing, labelKey)
+			continue
+		}
+		// invalid value
+		regexMatcher := regexp.MustCompile(labelValue)
+		if !regexMatcher.MatchString(targetNode.Labels[labelKey]) {
+			m.invalidValue = append(m.invalidValue, fmt.Sprintf("%s: %s", labelKey, targetNode.Labels[labelKey]))
+			return false
+		}
+	}
+
+	return true
+}
+
+func (m *nodeListPropertyRegexpMatcher[T]) matchCapacity(nodes []corev1.Node) bool {
+	targetNode := corev1.Node{}
+	for _, node := range nodes {
+		_, ok := m.expected[node.Name]
+		if !ok {
+			continue
+		}
+		targetNode = node
+		break
+	}
+
+	m.node = &targetNode
+
+	for labelKey, labelValue := range m.expected[targetNode.Name] {
+		// missing key
+		rn := corev1.ResourceName(labelKey)
+		if _, ok := targetNode.Status.Capacity[rn]; !ok {
+			m.missing = append(m.missing, labelKey)
+			continue
+		}
+		// invalid value
+		capacity := targetNode.Status.Capacity[rn]
+		regexMatcher := regexp.MustCompile(labelValue)
+		if !regexMatcher.MatchString(capacity.String()) {
+			m.invalidValue = append(m.invalidValue, fmt.Sprintf("%s: %s", labelKey, capacity.String()))
+			return false
+		}
+	}
+
+	return true
+}
+
+// FailureMessage method of the GomegaMatcher interface.
+func (m *nodeListPropertyRegexpMatcher[T]) FailureMessage(actual interface{}) string {
+	return m.message()
+}
+
+// NegatedFailureMessage method of the GomegaMatcher interface.
+func (m *nodeListPropertyRegexpMatcher[T]) NegatedFailureMessage(actual interface{}) string {
+	return fmt.Sprintf("Node %q matched unexpectedly", m.node.Name)
+}
+
+// TODO remove nolint when golangci-lint is able to cope with generics
+//
+//nolint:unused
+func (m *nodeListPropertyRegexpMatcher[T]) message() string {
+	msg := fmt.Sprintf("Node %q %s did not match:", m.node.Name, m.propertyName)
+	if len(m.missing) > 0 {
+		msg += fmt.Sprintf("\n  missing:\n    %s", strings.Join(m.missing, "\n    "))
+	}
+	if len(m.invalidValue) > 0 {
+		msg += fmt.Sprintf("\n  invalid value:\n    %s", strings.Join(m.invalidValue, "\n    "))
+	}
+	return msg
+}
+
+// jobIsCompleted checks if a job is completed
+//
+//nolint:unused
+func jobIsCompleted(ctx context.Context, cli clientset.Interface, namespace, podName string) bool {
+	pod, err := cli.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
+	if err != nil {
+		return false
+	}
+	// Check if the pod's phase is Succeeded.
+	if pod.Status.Phase == "Succeeded" {
+		return true
+	}
+	return false
+}
+
+// getNonControlPlaneNodes gets the nodes that are not tainted for exclusive control-plane usage
+//
+//nolint:unused
+func getNonControlPlaneNodes(ctx context.Context, cli clientset.Interface) ([]corev1.Node, error) {
+	nodeList, err := cli.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return nil, err
+	}
+	if len(nodeList.Items) == 0 {
+		return nil, fmt.Errorf("no nodes found in the cluster")
+	}
+
+	controlPlaneTaint := corev1.Taint{
+		Effect: corev1.TaintEffectNoSchedule,
+		Key:    "node-role.kubernetes.io/control-plane",
+	}
+	out := []corev1.Node{}
+	for _, node := range nodeList.Items {
+		if !taintExists(node.Spec.Taints, &controlPlaneTaint) {
+			out = append(out, node)
+		}
+	}
+
+	if len(out) == 0 {
+		return nil, fmt.Errorf("no non-control-plane nodes found in the cluster")
+	}
+	return out, nil
+}
+
+// taintExists checks if the given taint exists in list of taints. Returns true if exists false otherwise.
+//
+//nolint:unused
+func taintExists(taints []corev1.Taint, taintToFind *corev1.Taint) bool {
+	for _, taint := range taints {
+		if taint.MatchTaint(taintToFind) {
+			return true
+		}
+	}
+	return false
+}
+
+// getNode returns the node object from the list of nodes
+//
+//nolint:unused
+func getNode(nodes []corev1.Node, nodeName string) corev1.Node {
+	for _, node := range nodes {
+		if node.Name == nodeName {
+			return node
+		}
+	}
+	return corev1.Node{}
+}
+
+// CreateOrUpdateJobsFromFile creates or updates jobs from a file
+func CreateOrUpdateJobsFromFile(ctx context.Context, cli clientset.Interface, filename, namespace string) ([]string, error) {
+	jobs, err := newJobFromfile(filepath.Join(packagePath, "..", "..", "testdata", filename))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create Job from file: %w", err)
+	}
+
+	names := make([]string, len(jobs))
+	for i, job := range jobs {
+		job.Namespace = namespace
+
+		names[i] = job.Name
+
+		// create or update the job
+		_, err = cli.BatchV1().Jobs(namespace).Get(ctx, job.Name, metav1.GetOptions{})
+		if !k8serrors.IsNotFound(err) {
+			// update the job
+			_, err = cli.BatchV1().Jobs(namespace).Update(ctx, job, metav1.UpdateOptions{})
+			if err != nil {
+				return nil, fmt.Errorf("failed to update job: %w", err)
+			}
+			continue
+		}
+		// create the job
+		_, err = cli.BatchV1().Jobs(namespace).Create(ctx, job, metav1.CreateOptions{})
+		if err != nil {
+			return nil, fmt.Errorf("failed to create job: %w", err)
+		}
+
+	}
+
+	return names, nil
+}
+
+func newJobFromfile(path string) ([]*batchv1.Job, error) {
+	objs, err := apiObjsFromFile(path, k8sscheme.Codecs.UniversalDeserializer())
+	if err != nil {
+		return nil, err
+	}
+
+	jobs := make([]*batchv1.Job, len(objs))
+
+	for i, obj := range objs {
+		var ok bool
+		jobs[i], ok = obj.(*batchv1.Job)
+		if !ok {
+			return nil, fmt.Errorf("unexpected type %t when reading %q", obj, path)
+		}
+	}
+
+	return jobs, nil
+}
+func apiObjsFromFile(path string, decoder apiruntime.Decoder) ([]apiruntime.Object, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+
+	// Use Kubernetes' YAML decoder that properly handles multiple documents
+	// separated by "---", similar to how kubectl processes multi-document YAML files
+	yamlDecoder := yaml.NewYAMLOrJSONDecoder(bytes.NewReader(data), 4096)
+	objs := []apiruntime.Object{}
+
+	for {
+		// Decode into raw extension first
+		raw := apiruntime.RawExtension{}
+		if err := yamlDecoder.Decode(&raw); err != nil {
+			if err == io.EOF {
+				break
+			}
+			return nil, err
+		}
+
+		// Skip empty documents
+		raw.Raw = bytes.TrimSpace(raw.Raw)
+		if len(raw.Raw) == 0 {
+			continue
+		}
+
+		// Now decode the actual object using the provided decoder
+		obj, _, err := decoder.Decode(raw.Raw, nil, nil)
+		if err != nil {
+			return nil, err
+		}
+		objs = append(objs, obj)
+	}
+
+	return objs, nil
+}
+
+// cleanupNamespaceResources removes all resources in the specified namespace.
+func cleanupNamespaceResources(namespace string) {
+	err := cleanupTestPods(namespace)
+	Expect(err).NotTo(HaveOccurred())
+
+	err = cleanupHelmDeployments(namespace)
+	Expect(err).NotTo(HaveOccurred())
+
+	cleanupNode(clientSet)
+	cleanupNFDObjects(nfdClient, testNamespace.Name)
+	cleanupCRDs()
+}
+
+// waitForDeletion polls the provided checkFunc until a NotFound error is returned,
+// confirming that the resource is deleted.
+func waitForDeletion(resourceName string, checkFunc func() error) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		err := checkFunc()
+		if err != nil && k8serrors.IsNotFound(err) {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		return fmt.Errorf("%s still exists", resourceName)
+	}).WithPolling(5 * time.Second).WithTimeout(2 * time.Minute).WithContext(ctx).Should(Succeed())
+	return nil
+}
 
-	// check if flags are set and if not cancel the test run
-	if *ImageRepo == "" || *ImageTag == "" || *HelmChart == "" {
-		log.Fatal("Required flags not set. Please set -image.repo, -image.tag and -helm-chart")
+// cleanupTestPods deletes all test Pods in the namespace that have the label "app.nvidia.com=k8s-dra-driver-gpu-test-app".
+func cleanupTestPods(namespace string) error {
+	labelSelector := "app.nvidia.com=k8s-device-plugin-test-app"
+	podList, err := clientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: labelSelector,
+	})
+	if err != nil {
+		return err
 	}
 
-	os.Exit(m.Run())
+	zero := int64(0)
+	deleteOptions := metav1.DeleteOptions{GracePeriodSeconds: &zero}
+	for _, pod := range podList.Items {
+		if err = clientSet.CoreV1().Pods(namespace).Delete(ctx, pod.Name, deleteOptions); err != nil {
+			return err
+		}
+		if err = waitForDeletion(pod.Name, func() error {
+			_, err := clientSet.CoreV1().Pods(namespace).Get(ctx, pod.Name, metav1.GetOptions{})
+			return err
+		}); err != nil {
+			return err
+		}
+	}
+	return nil
 }
 
-func TestE2E(t *testing.T) {
-	gomega.RegisterFailHandler(ginkgo.Fail)
-	// Run tests through the Ginkgo runner with output to console + JUnit for Jenkins
-	suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration()
-	// Randomize specs as well as suites
-	suiteConfig.RandomizeAllSpecs = true
+// cleanupHelmDeployments uninstalls all deployed Helm releases in the specified namespace.
+func cleanupHelmDeployments(namespace string) error {
+	releases, err := helmClient.ListDeployedReleases()
+	if err != nil {
+		return fmt.Errorf("failed to list deployed releases: %w", err)
+	}
+
+	for _, release := range releases {
+		// Check if the release is deployed in the target namespace.
+		// Depending on your helmClient configuration the release might carry the namespace information.
+		if release.Namespace == namespace {
+			if err := helmClient.UninstallReleaseByName(release.Name); err != nil {
+				return fmt.Errorf("failed to uninstall release %q: %w", release.Name, err)
+			}
+		}
+	}
+	return nil
+}
+
+// deleteTestNamespace deletes the test namespace and waits for its deletion.
+func deleteTestNamespace() {
+	defer func() {
+		err := clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
+		if err != nil && !k8serrors.IsNotFound(err) {
+			Expect(err).NotTo(HaveOccurred())
+		}
+		err = waitForDeletion(testNamespace.Name, func() error {
+			_, err := clientSet.CoreV1().Namespaces().Get(ctx, testNamespace.Name, metav1.GetOptions{})
+			return err
+		})
+		Expect(err).NotTo(HaveOccurred())
+	}()
+}
+
+// cleanupCRDs deletes specific CRDs used during testing.
+func cleanupCRDs() {
+	crds := []string{
+		"nodefeatures.nfd.k8s-sigs.io",
+		"nodefeaturegroups.nfd.k8s-sigs.io",
+		"nodefeaturerules.nfd.k8s-sigs.io",
+	}
+
+	for _, crd := range crds {
+		err := extClient.ApiextensionsV1().CustomResourceDefinitions().Delete(ctx, crd, metav1.DeleteOptions{})
+		Expect(err).NotTo(HaveOccurred())
+
+		_ = waitForDeletion(crd, func() error {
+			_, err := extClient.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, crd, metav1.GetOptions{})
+			return err
+		})
+	}
+}
+
+// cleanupNode deletes all NFD/GFD related metadata from the Node object, i.e.
+// labels and annotations
+func cleanupNode(cs clientset.Interface) {
+	// Per-node cleanup function
+	cleanup := func(nodeName string) error {
+		node, err := cs.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
+		Expect(err).NotTo(HaveOccurred())
+
+		update := false
+		updateStatus := false
+		// Gather info about all NFD-managed node assets outside the default prefix
+		nfdLabels := map[string]struct{}{}
+		for _, name := range strings.Split(node.Annotations[nfdv1alpha1.FeatureLabelsAnnotation], ",") {
+			if strings.Contains(name, "/") {
+				nfdLabels[name] = struct{}{}
+			}
+		}
+		nfdERs := map[string]struct{}{}
+		for _, name := range strings.Split(node.Annotations[nfdv1alpha1.ExtendedResourceAnnotation], ",") {
+			if strings.Contains(name, "/") {
+				nfdERs[name] = struct{}{}
+			}
+		}
+
+		// Remove labels
+		for key := range node.Labels {
+			_, ok := nfdLabels[key]
+			if ok || strings.HasPrefix(key, nfdv1alpha1.FeatureLabelNs) {
+				delete(node.Labels, key)
+				update = true
+			}
+		}
+
+		// Remove annotations
+		for key := range node.Annotations {
+			if strings.HasPrefix(key, nfdv1alpha1.AnnotationNs) {
+				delete(node.Annotations, key)
+				update = true
+			}
+		}
+
+		// Remove nvidia.com/ labels
+		for key := range node.Labels {
+			if strings.HasPrefix(key, "nvidia.com/") {
+				delete(node.Labels, key)
+				update = true
+			}
+		}
+
+		// Remove extended resources
+		for key := range node.Status.Capacity {
+			// We check for FeatureLabelNs as -resource-labels can create ERs there
+			_, ok := nfdERs[string(key)]
+			if ok || strings.HasPrefix(string(key), nfdv1alpha1.FeatureLabelNs) {
+				delete(node.Status.Capacity, key)
+				delete(node.Status.Allocatable, key)
+				updateStatus = true
+			}
+		}
+
+		if updateStatus {
+			By("[Cleanup]\tDeleting NFD extended resources from node " + nodeName)
+			if _, err := cs.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
+				return err
+			}
+		}
+
+		if update {
+			By("[Cleanup]\tDeleting NFD labels, annotations and taints from node " + node.Name)
+			if _, err := cs.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil {
+				return err
+			}
+		}
+
+		return nil
+	}
 
-	ginkgo.RunSpecs(t, "nvidia k8s-device-plugin e2e suite", suiteConfig, reporterConfig)
+	// Cleanup all nodes
+	nodeList, err := cs.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+	Expect(err).NotTo(HaveOccurred())
+
+	for _, n := range nodeList.Items {
+		nodeName := n.Name
+		Eventually(func(g Gomega) error {
+			return cleanup(nodeName)
+		}).WithPolling(100 * time.Millisecond).WithTimeout(500 * time.Millisecond).Should(Succeed())
+	}
+}
+
+func cleanupNFDObjects(cli *nfdclient.Clientset, namespace string) {
+	cleanupNodeFeatureRules(cli)
+	cleanupNodeFeatures(cli, namespace)
+}
+
+// cleanupNodeFeatures deletes all NodeFeature objects in the given namespace
+func cleanupNodeFeatures(cli *nfdclient.Clientset, namespace string) {
+	nfs, err := cli.NfdV1alpha1().NodeFeatures(namespace).List(ctx, metav1.ListOptions{})
+	if k8serrors.IsNotFound(err) {
+		// Omitted error, nothing to do.
+		return
+	}
+	Expect(err).NotTo(HaveOccurred())
+
+	if len(nfs.Items) != 0 {
+		By("[Cleanup]\tDeleting NodeFeature objects from namespace " + namespace)
+		for _, nf := range nfs.Items {
+			err = cli.NfdV1alpha1().NodeFeatures(namespace).Delete(ctx, nf.Name, metav1.DeleteOptions{})
+			if k8serrors.IsNotFound(err) {
+				// Omitted error
+				continue
+			}
+			Expect(err).NotTo(HaveOccurred())
+		}
+	}
+}
+
+// cleanupNodeFeatureRules deletes all NodeFeatureRule objects
+func cleanupNodeFeatureRules(cli *nfdclient.Clientset) {
+	nfrs, err := cli.NfdV1alpha1().NodeFeatureRules().List(ctx, metav1.ListOptions{})
+	if k8serrors.IsNotFound(err) {
+		// Omitted error, nothing to do.
+		return
+	}
+	Expect(err).NotTo(HaveOccurred())
+
+	if len(nfrs.Items) != 0 {
+		By("[Cleanup]\tDeleting NodeFeatureRule objects from the cluster")
+		for _, nfr := range nfrs.Items {
+			err = cli.NfdV1alpha1().NodeFeatureRules().Delete(ctx, nfr.Name, metav1.DeleteOptions{})
+			if k8serrors.IsNotFound(err) {
+				// Omitted error
+				continue
+			}
+			Expect(err).NotTo(HaveOccurred())
+		}
+	}
+}
+
+// getRequiredEnvvar returns the specified envvar if set or raises an error.
+func getRequiredEnvvar[T any](key string) T {
+	v, err := getEnvVarAs[T](key)
+	Expect(err).To(BeNil(), "required environement variable not set", key)
+	return v
+}
+
+func getEnvVarAs[T any](key string) (T, error) {
+	var zero T
+	value := os.Getenv(key)
+	if value == "" {
+		return zero, errors.New("env var not set")
+	}
+
+	switch any(zero).(type) {
+	case bool:
+		v, err := strconv.ParseBool(value)
+		if err != nil {
+			return zero, err
+		}
+		return any(v).(T), nil
+	case int:
+		v, err := strconv.Atoi(value)
+		if err != nil {
+			return zero, err
+		}
+		return any(v).(T), nil
+	case string:
+		return any(value).(T), nil
+	default:
+		return zero, errors.New("unsupported type")
+	}
+}
+
+func getEnvVarOrDefault[T any](key string, defaultValue T) T {
+	val, err := getEnvVarAs[T](key)
+	if err != nil {
+		return defaultValue
+	}
+	return val
 }
diff --git a/tests/e2e/framework.go b/tests/e2e/framework.go
deleted file mode 100644
index 7a870548f..000000000
--- a/tests/e2e/framework.go
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package e2e
-
-import (
-	"github.com/onsi/ginkgo/v2"
-)
-
-// NVDescribe annotates the test with the NVIDIA label.
-func NVDescribe(text string, body func()) bool {
-	return ginkgo.Describe("[nvidia] "+text, body)
-}
diff --git a/tests/e2e/framework/framework.go b/tests/e2e/framework/framework.go
deleted file mode 100644
index c371a1384..000000000
--- a/tests/e2e/framework/framework.go
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Package framework contains provider-independent helper code for
-// building and running E2E tests with Ginkgo. The actual Ginkgo test
-// suites gets assembled by combining this framework, the optional
-// provider support code and specific tests via a separate .go file
-// like Kubernetes' test/e2e.go.
-package framework
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"log"
-	"math/rand"
-	"os"
-	"path/filepath"
-	"time"
-
-	helm "github.com/mittwald/go-helm-client"
-	"github.com/onsi/ginkgo/v2"
-	"github.com/onsi/gomega"
-	corev1 "k8s.io/api/core/v1"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/apimachinery/pkg/runtime/schema"
-	clientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
-)
-
-const (
-	// DefaultNamespaceDeletionTimeout is timeout duration for waiting for a namespace deletion.
-	DefaultNamespaceDeletionTimeout = 5 * time.Minute
-)
-
-// Options is a struct for managing test framework options.
-type Options struct {
-	ClientQPS    float32
-	ClientBurst  int
-	GroupVersion *schema.GroupVersion
-}
-
-// Framework supports common operations used by e2e tests; it will keep a client & a namespace for you.
-// Eventual goal is to merge this with integration test framework.
-//
-// You can configure the pod security level for your test by setting the `NamespacePodSecurityLevel`
-// which will set all three of pod security admission enforce, warn and audit labels on the namespace.
-// The default pod security profile is "restricted".
-// Each of the labels can be overridden by using more specific NamespacePodSecurity* attributes of this
-// struct.
-type Framework struct {
-	BaseName string
-
-	// Set together with creating the ClientSet and the namespace.
-	// Guaranteed to be unique in the cluster even when running the same
-	// test multiple times in parallel.
-	UniqueName string
-
-	clientConfig *rest.Config
-	ClientSet    clientset.Interface
-
-	// Helm
-	HelmClient  helm.Client
-	HelmLogFile *os.File
-	HelmLogger  *log.Logger
-
-	// configuration for framework's client
-	Options Options
-
-	SkipNamespaceCreation    bool              // Whether to skip creating a namespace
-	Namespace                *corev1.Namespace // Every test has at least one namespace unless creation is skipped
-	NamespaceDeletionTimeout time.Duration
-
-	namespacesToDelete []*corev1.Namespace // Some tests have more than one.
-}
-
-// NewFramework creates a test framework.
-func NewFramework(baseName string) *Framework {
-	f := &Framework{
-		BaseName: baseName,
-	}
-
-	// The order is important here: if the extension calls ginkgo.BeforeEach
-	// itself, then it can be sure that f.BeforeEach already ran when its
-	// own callback gets invoked.
-	ginkgo.BeforeEach(f.BeforeEach)
-
-	return f
-}
-
-// ClientConfig an externally accessible method for reading the kube client config.
-func (f *Framework) ClientConfig() *rest.Config {
-	ret := rest.CopyConfig(f.clientConfig)
-	// json is the least common denominator
-	ret.ContentType = runtime.ContentTypeJSON
-	ret.AcceptContentTypes = runtime.ContentTypeJSON
-	return ret
-}
-
-// BeforeEach gets a client and makes a namespace.
-func (f *Framework) BeforeEach(ctx context.Context) {
-	// DeferCleanup, in contrast to AfterEach, triggers execution in
-	// first-in-last-out order. This ensures that the framework instance
-	// remains valid as long as possible.
-	//
-	// In addition, AfterEach will not be called if a test never gets here.
-	ginkgo.DeferCleanup(f.AfterEach)
-
-	ginkgo.By("Creating a kubernetes client")
-	config, err := LoadConfig()
-	gomega.Expect(err).NotTo(gomega.HaveOccurred())
-
-	config.QPS = f.Options.ClientQPS
-	config.Burst = f.Options.ClientBurst
-	if f.Options.GroupVersion != nil {
-		config.GroupVersion = f.Options.GroupVersion
-	}
-	f.clientConfig = rest.CopyConfig(config)
-	f.ClientSet, err = clientset.NewForConfig(config)
-	gomega.Expect(err).NotTo(gomega.HaveOccurred())
-
-	if !f.SkipNamespaceCreation {
-		ginkgo.By(fmt.Sprintf("Building a namespace with basename %s", f.BaseName))
-		namespace, err := f.CreateNamespace(ctx, f.BaseName, map[string]string{
-			"e2e-framework": f.BaseName,
-		})
-		gomega.Expect(err).NotTo(gomega.HaveOccurred())
-
-		f.Namespace = namespace
-
-		f.UniqueName = f.Namespace.GetName()
-	} else {
-		// not guaranteed to be unique, but very likely
-		f.UniqueName = fmt.Sprintf("%s-%08x", f.BaseName, rand.Int31())
-	}
-
-	// Create a Helm client
-	ginkgo.By("Creating a Helm client")
-
-	err = os.MkdirAll(filepath.Dir(TestContext.HelmLogFile), 0755)
-	gomega.Expect(err).To(gomega.BeNil())
-
-	f.HelmLogFile, err = os.OpenFile(TestContext.HelmLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
-	gomega.Expect(err).To(gomega.BeNil())
-
-	f.HelmLogger = log.New(f.HelmLogFile, fmt.Sprintf("%s\t", f.UniqueName), log.Ldate|log.Ltime)
-	helmRestConf := &helm.RestConfClientOptions{
-		Options: &helm.Options{
-			Namespace:        f.Namespace.Name,
-			RepositoryCache:  "/tmp/.helmcache",
-			RepositoryConfig: "/tmp/.helmrepo",
-			Debug:            true,
-			DebugLog:         f.HelmLogger.Printf,
-		},
-		RestConfig: config,
-	}
-
-	f.HelmClient, err = helm.NewClientFromRestConf(helmRestConf)
-	gomega.Expect(err).NotTo(gomega.HaveOccurred())
-}
-
-// AfterEach deletes the namespace, after reading its events.
-func (f *Framework) AfterEach(ctx context.Context) {
-	// This should not happen. Given ClientSet is a public field a test must have updated it!
-	// Error out early before any API calls during cleanup.
-	gomega.Expect(f.ClientSet).NotTo(gomega.BeNil())
-
-	// DeleteNamespace at the very end in defer, to avoid any
-	// expectation failures preventing deleting the namespace.
-	defer func() {
-		var nsDeletionErrors error
-		// Whether to delete namespace is determined by 3 factors: delete-namespace flag, delete-namespace-on-failure flag and the test result
-		// if delete-namespace set to false, namespace will always be preserved.
-		// if delete-namespace is true and delete-namespace-on-failure is false, namespace will be preserved if test failed.
-		if TestContext.DeleteNamespace && (TestContext.DeleteNamespaceOnFailure || !ginkgo.CurrentSpecReport().Failed()) {
-			for _, ns := range f.namespacesToDelete {
-				ginkgo.By(fmt.Sprintf("[Cleanup]\tDeleting testing namespace %q.", ns.Name))
-				if err := f.ClientSet.CoreV1().Namespaces().Delete(ctx, ns.Name, metav1.DeleteOptions{}); err != nil {
-					if !apierrors.IsNotFound(err) {
-						nsDeletionErrors = errors.Join(nsDeletionErrors, fmt.Errorf("error deleting %v: %w", ns.Name, err))
-					}
-				}
-				// remove the namespace from the list of namespaces to delete
-				// so that it is not deleted again in the defer block
-				f.namespacesToDelete = f.namespacesToDelete[1:]
-			}
-		}
-
-		// Unsetting this is relevant for a following test that uses
-		// the same instance because it might not reach f.BeforeEach
-		// when some other BeforeEach skips the test first.
-		f.Namespace = nil
-		f.clientConfig = nil
-		f.ClientSet = nil
-
-		// if we had errors deleting, report them now.
-		gomega.Expect(nsDeletionErrors).NotTo(gomega.HaveOccurred())
-	}()
-
-	// Close helm log file
-	err := f.HelmLogFile.Close()
-	gomega.Expect(err).To(gomega.BeNil())
-}
-
-// CreateNamespace creates a namespace for e2e testing.
-func (f *Framework) CreateNamespace(ctx context.Context, baseName string, labels map[string]string) (*corev1.Namespace, error) {
-	createTestingNS := TestContext.CreateTestingNS
-	if createTestingNS == nil {
-		createTestingNS = CreateTestingNS
-	}
-
-	if labels == nil {
-		labels = make(map[string]string)
-	} else {
-		labelsCopy := make(map[string]string)
-		for k, v := range labels {
-			labelsCopy[k] = v
-		}
-		labels = labelsCopy
-	}
-
-	ns, err := createTestingNS(ctx, baseName, f.ClientSet, labels)
-
-	// check ns instead of err to see if it's nil as we may
-	// fail to create serviceAccount in it.
-	f.AddNamespacesToDelete(ns)
-
-	return ns, err
-}
-
-// DeleteNamespace can be used to delete a namespace
-func (f *Framework) DeleteNamespace(ctx context.Context, name string) {
-	defer func() {
-		err := f.ClientSet.CoreV1().Namespaces().Delete(ctx, name, metav1.DeleteOptions{})
-		if err != nil && !apierrors.IsNotFound(err) {
-			gomega.Expect(err).NotTo(gomega.HaveOccurred())
-		}
-		err = WaitForNamespacesDeleted(ctx, f.ClientSet, []string{name}, DefaultNamespaceDeletionTimeout)
-		gomega.Expect(err).NotTo(gomega.HaveOccurred())
-	}()
-}
-
-// AddNamespacesToDelete adds one or more namespaces to be deleted when the test
-// completes.
-func (f *Framework) AddNamespacesToDelete(namespaces ...*corev1.Namespace) {
-	for _, ns := range namespaces {
-		if ns == nil {
-			continue
-		}
-		f.namespacesToDelete = append(f.namespacesToDelete, ns)
-
-	}
-}
diff --git a/tests/e2e/framework/test_context.go b/tests/e2e/framework/test_context.go
deleted file mode 100644
index 739b9aa6e..000000000
--- a/tests/e2e/framework/test_context.go
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package framework
-
-import (
-	"context"
-	"flag"
-	"os"
-
-	corev1 "k8s.io/api/core/v1"
-	clientset "k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/tools/clientcmd"
-)
-
-// CreateTestingNSFn is a func that is responsible for creating namespace used for executing e2e tests.
-type CreateTestingNSFn func(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*corev1.Namespace, error)
-
-// TestContextType contains test settings and global state
-type TestContextType struct {
-	KubeConfig               string
-	KubeContext              string
-	DeleteNamespace          bool
-	DeleteNamespaceOnFailure bool
-
-	HelmLogFile string
-
-	// CreateTestingNS is responsible for creating namespace used for executing e2e tests.
-	// It accepts namespace base name, which will be prepended with e2e prefix, kube client
-	// and labels to be applied to a namespace.
-	CreateTestingNS CreateTestingNSFn
-}
-
-// TestContext should be used by all tests to access common context data.
-var TestContext = TestContextType{}
-
-// RegisterClusterFlags registers flags specific to the cluster e2e test suite.
-func RegisterClusterFlags(flags *flag.FlagSet) {
-	flags.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
-	flags.BoolVar(&TestContext.DeleteNamespaceOnFailure, "delete-namespace-on-failure", true, "If true, framework will delete test namespace on failure. Used only during test debugging.")
-	flags.StringVar(&TestContext.KubeConfig, clientcmd.RecommendedConfigPathFlag, os.Getenv(clientcmd.RecommendedConfigPathEnvVar), "Path to kubeconfig containing embedded authinfo.")
-	flags.StringVar(&TestContext.KubeContext, clientcmd.FlagContext, "", "kubeconfig context to use/override. If unset, will use value from 'current-context'")
-	flags.StringVar(&TestContext.HelmLogFile, "helm-log-file", "e2e-helm", "Path to the file where helm logs will be written.")
-}
diff --git a/tests/e2e/framework/util.go b/tests/e2e/framework/util.go
deleted file mode 100644
index 78e06fd69..000000000
--- a/tests/e2e/framework/util.go
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package framework
-
-import (
-	"context"
-	"fmt"
-	"math/rand"
-	"strconv"
-	"strings"
-	"time"
-
-	"github.com/onsi/ginkgo/v2"
-	corev1 "k8s.io/api/core/v1"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/util/uuid"
-	"k8s.io/apimachinery/pkg/util/wait"
-	clientset "k8s.io/client-go/kubernetes"
-	restclient "k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/clientcmd"
-	clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
-)
-
-const (
-	// PollInterval is how often to Poll pods, nodes and claims.
-	PollInterval = 2 * time.Second
-)
-
-// RunID is a unique identifier of the e2e run.
-// Beware that this ID is not the same for all tests in the e2e run, because each Ginkgo node creates it separately.
-var RunID = uuid.NewUUID()
-
-// RandomSuffix provides a random sequence to append to pods,services,rcs.
-func RandomSuffix() string {
-	return strconv.Itoa(rand.Intn(10000))
-}
-
-// LoadConfig returns a config for a rest client with the UserAgent set to include the current test name.
-func LoadConfig() (config *restclient.Config, err error) {
-	defer func() {
-		if err == nil && config != nil {
-			testDesc := ginkgo.CurrentSpecReport()
-			if len(testDesc.ContainerHierarchyTexts) > 0 {
-				testName := strings.Join(testDesc.ContainerHierarchyTexts, " ")
-				if len(testDesc.LeafNodeText) > 0 {
-					testName = testName + " " + testDesc.LeafNodeText
-				}
-				config.UserAgent = fmt.Sprintf("%s -- %s", restclient.DefaultKubernetesUserAgent(), testName)
-			}
-		}
-	}()
-
-	c, err := restclientConfig(TestContext.KubeContext)
-	if err != nil {
-		if TestContext.KubeConfig == "" {
-			return restclient.InClusterConfig()
-		}
-		return nil, err
-	}
-
-	return clientcmd.NewDefaultClientConfig(*c, &clientcmd.ConfigOverrides{}).ClientConfig()
-}
-
-// restclientConfig returns a config holds the information needed to build connection to kubernetes clusters.
-func restclientConfig(kubeContext string) (*clientcmdapi.Config, error) {
-	if TestContext.KubeConfig == "" {
-		return nil, fmt.Errorf("KubeConfig must be specified to load client config")
-	}
-	c, err := clientcmd.LoadFromFile(TestContext.KubeConfig)
-	if err != nil {
-		return nil, fmt.Errorf("error loading KubeConfig: %v", err.Error())
-	}
-	if kubeContext != "" {
-		c.CurrentContext = kubeContext
-	}
-	return c, nil
-}
-
-// CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
-// Please see NewFramework instead of using this directly.
-func CreateTestingNS(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*corev1.Namespace, error) {
-	if labels == nil {
-		labels = map[string]string{}
-	}
-	labels["e2e-run"] = string(RunID)
-
-	// We don't use ObjectMeta.GenerateName feature, as in case of API call
-	// failure we don't know whether the namespace was created and what is its
-	// name.
-	name := fmt.Sprintf("%v-%v", baseName, RandomSuffix())
-
-	namespaceObj := &corev1.Namespace{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      name,
-			Namespace: "",
-			Labels:    labels,
-		},
-		Status: corev1.NamespaceStatus{},
-	}
-	// Be robust about making the namespace creation call.
-	var got *corev1.Namespace
-	if err := wait.PollUntilContextTimeout(ctx, PollInterval, 30*time.Second, true, func(ctx context.Context) (bool, error) {
-		var err error
-		got, err = c.CoreV1().Namespaces().Create(ctx, namespaceObj, metav1.CreateOptions{})
-		if err != nil {
-			if apierrors.IsAlreadyExists(err) {
-				// regenerate on conflict
-				namespaceObj.Name = fmt.Sprintf("%v-%v", baseName, RandomSuffix())
-			}
-			return false, nil
-		}
-		return true, nil
-	}); err != nil {
-		return nil, err
-	}
-
-	return got, nil
-}
-
-// WaitForNamespacesDeleted waits for the namespaces to be deleted.
-func WaitForNamespacesDeleted(ctx context.Context, c clientset.Interface, namespaces []string, timeout time.Duration) error {
-	ginkgo.By(fmt.Sprintf("Waiting for namespaces %+v to vanish", namespaces))
-	nsMap := map[string]bool{}
-	for _, ns := range namespaces {
-		nsMap[ns] = true
-	}
-	// Now POLL until all namespaces have been eradicated.
-	return wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, true,
-		func(ctx context.Context) (bool, error) {
-			nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{})
-			if err != nil {
-				return false, err
-			}
-			for _, item := range nsList.Items {
-				if _, ok := nsMap[item.Name]; ok {
-					return false, nil
-				}
-			}
-			return true, nil
-		})
-}
diff --git a/tests/e2e/gomega.go b/tests/e2e/gomega.go
deleted file mode 100644
index 1fa4364e3..000000000
--- a/tests/e2e/gomega.go
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package e2e
-
-import (
-	"context"
-	"fmt"
-	"regexp"
-	"strings"
-	"time"
-
-	. "github.com/onsi/gomega"
-	gomegatypes "github.com/onsi/gomega/types"
-
-	corev1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	clientset "k8s.io/client-go/kubernetes"
-
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common"
-)
-
-type k8sLabels map[string]string
-
-// eventuallyNonControlPlaneNodes is a helper for asserting node properties
-func eventuallyNonControlPlaneNodes(ctx context.Context, cli clientset.Interface) AsyncAssertion {
-	return Eventually(func(g Gomega, ctx context.Context) ([]corev1.Node, error) {
-		return common.GetNonControlPlaneNodes(ctx, cli)
-	}).WithPolling(1 * time.Second).WithTimeout(10 * time.Second).WithContext(ctx)
-}
-
-// MatchLabels returns a specialized Gomega matcher for checking if a list of
-// nodes are labeled as expected.
-func MatchLabels(expectedNew map[string]k8sLabels, oldNodes []corev1.Node) gomegatypes.GomegaMatcher {
-	return &nodeListPropertyRegexpMatcher[k8sLabels]{
-		propertyName: "labels",
-		expected:     expectedNew,
-		oldNodes:     oldNodes,
-	}
-}
-
-// MatchCapacity returns a specialized Gomega matcher for checking if a list of
-// nodes are configured as expected.
-func MatchCapacity(expectedNew map[string]k8sLabels, oldNodes []corev1.Node) gomegatypes.GomegaMatcher {
-	return &nodeListPropertyRegexpMatcher[k8sLabels]{
-		propertyName: "capacity",
-		expected:     expectedNew,
-		oldNodes:     oldNodes,
-	}
-}
-
-// nodeListPropertyRegexpMatcher is a generic Gomega matcher for asserting one property a group of nodes.
-type nodeListPropertyRegexpMatcher[T any] struct {
-	expected map[string]k8sLabels
-	oldNodes []corev1.Node
-
-	propertyName string
-	node         *corev1.Node //nolint:unused
-	missing      []string     //nolint:unused
-	invalidValue []string     //nolint:unused
-}
-
-// Match method of the GomegaMatcher interface.
-func (m *nodeListPropertyRegexpMatcher[T]) Match(actual interface{}) (bool, error) {
-	nodes, ok := actual.([]corev1.Node)
-	if !ok {
-		return false, fmt.Errorf("expected []corev1.Node, got: %T", actual)
-	}
-
-	switch m.propertyName {
-	case "labels":
-		return m.matchLabels(nodes), nil
-	case "capacity":
-		return m.matchCapacity(nodes), nil
-	default:
-		return true, nil
-	}
-
-}
-
-func (m *nodeListPropertyRegexpMatcher[T]) matchLabels(nodes []corev1.Node) bool {
-	targetNode := corev1.Node{}
-	for _, node := range nodes {
-		_, ok := m.expected[node.Name]
-		if !ok {
-			continue
-		}
-		targetNode = node
-		break
-	}
-
-	m.node = &targetNode
-
-	for labelKey, labelValue := range m.expected[targetNode.Name] {
-		// missing key
-		if _, ok := targetNode.Labels[labelKey]; !ok {
-			m.missing = append(m.missing, labelKey)
-			continue
-		}
-		// invalid value
-		regexMatcher := regexp.MustCompile(labelValue)
-		if !regexMatcher.MatchString(targetNode.Labels[labelKey]) {
-			m.invalidValue = append(m.invalidValue, fmt.Sprintf("%s: %s", labelKey, targetNode.Labels[labelKey]))
-			return false
-		}
-	}
-
-	return true
-}
-
-func (m *nodeListPropertyRegexpMatcher[T]) matchCapacity(nodes []corev1.Node) bool {
-	targetNode := corev1.Node{}
-	for _, node := range nodes {
-		_, ok := m.expected[node.Name]
-		if !ok {
-			continue
-		}
-		targetNode = node
-		break
-	}
-
-	m.node = &targetNode
-
-	for labelKey, labelValue := range m.expected[targetNode.Name] {
-		// missing key
-		rn := corev1.ResourceName(labelKey)
-		if _, ok := targetNode.Status.Capacity[rn]; !ok {
-			m.missing = append(m.missing, labelKey)
-			continue
-		}
-		// invalid value
-		capacity := targetNode.Status.Capacity[rn]
-		regexMatcher := regexp.MustCompile(labelValue)
-		if !regexMatcher.MatchString(capacity.String()) {
-			m.invalidValue = append(m.invalidValue, fmt.Sprintf("%s: %s", labelKey, capacity.String()))
-			return false
-		}
-	}
-
-	return true
-}
-
-// FailureMessage method of the GomegaMatcher interface.
-func (m *nodeListPropertyRegexpMatcher[T]) FailureMessage(actual interface{}) string {
-	return m.message()
-}
-
-// NegatedFailureMessage method of the GomegaMatcher interface.
-func (m *nodeListPropertyRegexpMatcher[T]) NegatedFailureMessage(actual interface{}) string {
-	return fmt.Sprintf("Node %q matched unexpectedly", m.node.Name)
-}
-
-// TODO remove nolint when golangci-lint is able to cope with generics
-//
-//nolint:unused
-func (m *nodeListPropertyRegexpMatcher[T]) message() string {
-	msg := fmt.Sprintf("Node %q %s did not match:", m.node.Name, m.propertyName)
-	if len(m.missing) > 0 {
-		msg += fmt.Sprintf("\n  missing:\n    %s", strings.Join(m.missing, "\n    "))
-	}
-	if len(m.invalidValue) > 0 {
-		msg += fmt.Sprintf("\n  invalid value:\n    %s", strings.Join(m.invalidValue, "\n    "))
-	}
-	return msg
-}
-
-// JobIsCompleted checks if a job is completed
-func JobIsCompleted(ctx context.Context, cli clientset.Interface, namespace, podName string) bool {
-	pod, err := cli.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
-	if err != nil {
-		return false
-	}
-	// Check if the pod's phase is Succeeded.
-	if pod.Status.Phase == "Succeeded" {
-		return true
-	}
-	return false
-}
diff --git a/tests/e2e/gpu-feature-discovery_test.go b/tests/e2e/gpu-feature-discovery_test.go
index 5bc8aecfd..f66a935a5 100644
--- a/tests/e2e/gpu-feature-discovery_test.go
+++ b/tests/e2e/gpu-feature-discovery_test.go
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,21 +28,15 @@ import (
 
 	helm "github.com/mittwald/go-helm-client"
 	helmValues "github.com/mittwald/go-helm-client/values"
-	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
-	extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/rand"
-	nfdclient "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned"
 
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common"
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common/diagnostics"
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/framework"
+	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/internal"
 )
 
 // Actual test suite
-var _ = NVDescribe("GPU Feature Discovery", func() {
-	f := framework.NewFramework("gpu-feature-discovery")
-
+var _ = Describe("GPU Feature Discovery", Ordered, Label("gfd", "gpu", "e2e"), func() {
 	expectedLabelPatterns := k8sLabels{
 		"nvidia.com/gfd.timestamp":        "[0-9]{10}",
 		"nvidia.com/cuda.driver.major":    "[0-9]+",
@@ -71,113 +66,108 @@ var _ = NVDescribe("GPU Feature Discovery", func() {
 		"nodeFeature",
 	}
 
-	Context("When deploying GFD", Ordered, func() {
-		// helm-chart is required
-		if *HelmChart == "" {
-			Fail("No helm-chart for GPU-Feature-Discovery specified")
-		}
+	// Init global suite vars
+	var (
+		helmReleaseName string
+		chartSpec       helm.ChartSpec
+
+		collectLogsFrom      []string
+		diagnosticsCollector diagnostics.Collector
+	)
+
+	values := helmValues.Options{
+		Values: []string{
+			fmt.Sprintf("image.repository=%s", ImageRepo),
+			fmt.Sprintf("image.tag=%s", ImageTag),
+			fmt.Sprintf("image.pullPolicy=%s", ImagePullPolicy),
+			"gfd.enabled=true",
+			"devicePlugin.enabled=false",
+		},
+	}
 
-		// Init global suite vars vars
-		var (
-			crds      []*apiextensionsv1.CustomResourceDefinition
-			extClient *extclient.Clientset
-			nfdClient *nfdclient.Clientset
-
-			chartSpec       helm.ChartSpec
-			helmReleaseName string
-
-			collectLogsFrom      []string
-			diagnosticsCollector diagnostics.Collector
-		)
-
-		values := helmValues.Options{
-			Values: []string{
-				fmt.Sprintf("image.repository=%s", *ImageRepo),
-				fmt.Sprintf("image.tag=%s", *ImageTag),
-				fmt.Sprintf("image.pullPolicy=%s", *ImagePullPolicy),
-				"gfd.enabled=true",
-				"devicePlugin.enabled=false",
-			},
-		}
+	// checkNodeFeatureObject is a helper function to check if NodeFeature object was created
+	checkNodeFeatureObject := func(ctx context.Context, name string) bool {
+		gfdNodeFeature := fmt.Sprintf("nvidia-features-for-%s", name)
+		_, err := nfdClient.NfdV1alpha1().NodeFeatures(testNamespace.Name).Get(ctx, gfdNodeFeature, metav1.GetOptions{})
+		return err == nil
+	}
 
-		// checkNodeFeatureObject is a helper function to check if NodeFeature object was created
-		checkNodeFeatureObject := func(ctx context.Context, name string) bool {
-			gfdNodeFeature := fmt.Sprintf("nvidia-features-for-%s", name)
-			_, err := nfdClient.NfdV1alpha1().NodeFeatures(f.Namespace.Name).Get(ctx, gfdNodeFeature, metav1.GetOptions{})
-			return err == nil
-		}
+	// check Collector objects
+	collectLogsFrom = defaultCollectorObjects
+	if CollectLogsFrom != "" && CollectLogsFrom != "default" {
+		collectLogsFrom = strings.Split(CollectLogsFrom, ",")
+	}
 
-		// check Collector objects
-		collectLogsFrom = defaultCollectorObjects
-		if *CollectLogsFrom != "" && *CollectLogsFrom != "default" {
-			collectLogsFrom = strings.Split(*CollectLogsFrom, ",")
+	BeforeAll(func(ctx SpecContext) {
+		helmReleaseName = "gfd-e2e-test" + rand.String(5)
+
+		// reset Helm Client
+		chartSpec = helm.ChartSpec{
+			ReleaseName:   helmReleaseName,
+			ChartName:     HelmChart,
+			Namespace:     testNamespace.Name,
+			Wait:          true,
+			Timeout:       1 * time.Minute,
+			ValuesOptions: values,
+			CleanupOnFail: true,
 		}
 
-		BeforeAll(func(ctx context.Context) {
-			// Create clients for apiextensions and our CRD api
-			extClient = extclient.NewForConfigOrDie(f.ClientConfig())
-			nfdClient = nfdclient.NewForConfigOrDie(f.ClientConfig())
-			helmReleaseName = "gfd-e2e-test" + rand.String(5)
-		})
+		By("Installing GFD Helm chart")
+		_, err := helmClient.InstallChart(ctx, &chartSpec, nil)
+		Expect(err).NotTo(HaveOccurred())
 
-		JustBeforeEach(func(ctx context.Context) {
-			// reset Helm Client
-			chartSpec = helm.ChartSpec{
-				ReleaseName:   helmReleaseName,
-				ChartName:     *HelmChart,
-				Namespace:     f.Namespace.Name,
-				Wait:          true,
-				Timeout:       1 * time.Minute,
-				ValuesOptions: values,
-				CleanupOnFail: true,
-			}
-
-			By("Installing GFD Helm chart")
-			_, err := f.HelmClient.InstallChart(ctx, &chartSpec, nil)
-			Expect(err).NotTo(HaveOccurred())
-		})
+		// Wait for all DaemonSets to be ready
+		// Note: DaemonSet names are dynamically generated with the Helm release prefix,
+		// so we wait for all DaemonSets in the namespace rather than specific names
+		By("Waiting for all DaemonSets to be ready")
+		err = internal.WaitForDaemonSetsReady(ctx, clientSet, testNamespace.Name, "app.kubernetes.io/name=nvidia-device-plugin")
+		Expect(err).NotTo(HaveOccurred())
+	})
 
-		// Cleanup before next test run
-		AfterEach(func(ctx context.Context) {
-			// Run diagnostic collector if test failed
-			if CurrentSpecReport().Failed() {
-				var err error
-				diagnosticsCollector, err = diagnostics.New(
-					diagnostics.WithNamespace(f.Namespace.Name),
-					diagnostics.WithArtifactDir(*LogArtifactDir),
-					diagnostics.WithKubernetesClient(f.ClientSet),
-					diagnostics.WithNFDClient(nfdClient),
-					diagnostics.WithObjects(collectLogsFrom...),
-				)
-				Expect(err).NotTo(HaveOccurred())
+	AfterAll(func(ctx SpecContext) {
+		By("Uninstalling GFD Helm chart")
+		err := helmClient.UninstallReleaseByName(helmReleaseName)
+		if err != nil {
+			GinkgoWriter.Printf("Failed to uninstall helm release %s: %v\n", helmReleaseName, err)
+		}
+	})
 
-				err = diagnosticsCollector.Collect(ctx)
-				Expect(err).NotTo(HaveOccurred())
-			}
-			// Delete Helm release
-			err := f.HelmClient.UninstallReleaseByName(helmReleaseName)
+	// Cleanup before next test run
+	AfterEach(func(ctx SpecContext) {
+		// Run diagnostic collector if test failed
+		if CurrentSpecReport().Failed() {
+			var err error
+			diagnosticsCollector, err = diagnostics.New(
+				diagnostics.WithNamespace(testNamespace.Name),
+				diagnostics.WithArtifactDir(LogArtifactDir),
+				diagnostics.WithKubernetesClient(clientSet),
+				diagnostics.WithNFDClient(nfdClient),
+				diagnostics.WithObjects(collectLogsFrom...),
+			)
 			Expect(err).NotTo(HaveOccurred())
-			// Cleanup environment
-			By("[Cleanup]\tCleaning up environment")
-			common.CleanupNode(ctx, f.ClientSet)
-			common.CleanupNFDObjects(ctx, nfdClient, f.Namespace.Name)
-		})
 
-		AfterAll(func(ctx context.Context) {
-			for _, crd := range crds {
-				err := extClient.ApiextensionsV1().CustomResourceDefinitions().Delete(ctx, crd.Name, metav1.DeleteOptions{})
-				Expect(err).NotTo(HaveOccurred())
-			}
-		})
+			err = diagnosticsCollector.Collect(ctx)
+			Expect(err).NotTo(HaveOccurred())
+		}
+	})
 
-		Context("and NV Driver is not installed", func() {
-			It("it should create nvidia.com timestamp label", func(ctx context.Context) {
-				nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+	When("When deploying GFD", Ordered, Label("serial"), func() {
+		Context("NV Driver is not installed", func() {
+			BeforeEach(func() {
+				// Skip this context when driver is enabled since "NV Driver is installed"
+				// context provides more comprehensive testing
+				if NVIDIA_DRIVER_ENABLED {
+					Skip("Skipping driver-not-installed tests when NVIDIA_DRIVER_ENABLED is true")
+				}
+			})
+
+			It("it should create nvidia.com timestamp label", Label("timestamp"), func(ctx SpecContext) {
+				nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
 				Expect(err).NotTo(HaveOccurred())
 				Expect(len(nodeList.Items)).ToNot(BeZero())
 
 				// We pick one node targeted for our NodeFeature objects
-				nodes, err := common.GetNonControlPlaneNodes(ctx, f.ClientSet)
+				nodes, err := getNonControlPlaneNodes(ctx, clientSet)
 				Expect(err).NotTo(HaveOccurred())
 
 				targetNodeName := nodes[0].Name
@@ -188,52 +178,51 @@ var _ = NVDescribe("GPU Feature Discovery", func() {
 					targetNodeName: {
 						"nvidia.com/gfd.timestamp": "[0-9]{10}",
 					}}
-				eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchLabels(labelChecker, nodes))
+				eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
 			})
 			Context("and the NodeFeature API is enabled", func() {
-				It("gfd should create node feature object", func(ctx context.Context) {
+				It("gfd should create node feature object", Label("nodefeature"), func(ctx SpecContext) {
 					By("Updating GFD Helm chart values")
 					newValues := values
 					newValues.Values = append(newValues.Values, "nfd.enableNodeFeatureApi=true")
 					chartSpec.ValuesOptions = newValues
 					chartSpec.Replace = true
-					_, err := f.HelmClient.UpgradeChart(ctx, &chartSpec, nil)
+					_, err := helmClient.UpgradeChart(ctx, &chartSpec, nil)
 					Expect(err).NotTo(HaveOccurred())
 
 					By("Checking if NodeFeature CR object is created")
-					nodes, err := common.GetNonControlPlaneNodes(ctx, f.ClientSet)
+					nodes, err := getNonControlPlaneNodes(ctx, clientSet)
 					Expect(err).NotTo(HaveOccurred())
 
 					targetNodeName := nodes[0].Name
 					Expect(targetNodeName).ToNot(BeEmpty())
-					Eventually(func() bool {
+					Eventually(func(g Gomega) bool {
 						return checkNodeFeatureObject(ctx, targetNodeName)
-					}, 2*time.Minute, 5*time.Second).Should(BeTrue())
+					}).WithContext(ctx).WithPolling(5 * time.Second).WithTimeout(2 * time.Minute).Should(BeTrue())
 
 					By("Checking that node labels are created from NodeFeature object")
 					labelChecker := map[string]k8sLabels{
 						targetNodeName: {
 							"nvidia.com/gfd.timestamp": "[0-9]{10}",
 						}}
-					eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchLabels(labelChecker, nodes))
+					eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
 				})
 			})
 		})
 
-		Context("and NV Driver is installed", func() {
-			BeforeEach(func(ctx context.Context) {
-				// Skip test if NVIDIA_DRIVER_ENABLED is not set
-				if !*NVIDIA_DRIVER_ENABLED {
+		When("NV Driver is installed", func() {
+			It("it should create nvidia.com labels", Label("driver", "labels"), func(ctx SpecContext) {
+				if !NVIDIA_DRIVER_ENABLED {
 					Skip("NVIDIA_DRIVER_ENABLED is not set")
 				}
-			})
-			It("it should create nvidia.com labels", func(ctx context.Context) {
-				nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+
+				By("Checking the node labels")
+				nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
 				Expect(err).NotTo(HaveOccurred())
 				Expect(len(nodeList.Items)).ToNot(BeZero())
 
 				// We pick one node targeted for our NodeFeature objects
-				nodes, err := common.GetNonControlPlaneNodes(ctx, f.ClientSet)
+				nodes, err := getNonControlPlaneNodes(ctx, clientSet)
 				Expect(err).NotTo(HaveOccurred())
 
 				targetNodeName := nodes[0].Name
@@ -242,32 +231,35 @@ var _ = NVDescribe("GPU Feature Discovery", func() {
 				By("Checking the node labels")
 				labelChecker := map[string]k8sLabels{
 					targetNodeName: expectedLabelPatterns}
-				eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchLabels(labelChecker, nodes))
+				eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
 			})
 			Context("and the NodeFeature API is enabled", func() {
-				It("gfd should create node feature object", func(ctx context.Context) {
+				It("gfd should create node feature object", Label("driver", "nodefeature"), func(ctx SpecContext) {
+					if !NVIDIA_DRIVER_ENABLED {
+						Skip("NVIDIA_DRIVER_ENABLED is not set")
+					}
 					By("Updating GFD Helm chart values")
 					newValues := values
 					newValues.Values = append(newValues.Values, "nfd.enableNodeFeatureApi=true")
 					chartSpec.ValuesOptions = newValues
 					chartSpec.Replace = true
-					_, err := f.HelmClient.UpgradeChart(ctx, &chartSpec, nil)
+					_, err := helmClient.UpgradeChart(ctx, &chartSpec, nil)
 					Expect(err).NotTo(HaveOccurred())
 
 					By("Checking if NodeFeature CR object is created")
-					nodes, err := common.GetNonControlPlaneNodes(ctx, f.ClientSet)
+					nodes, err := getNonControlPlaneNodes(ctx, clientSet)
 					Expect(err).NotTo(HaveOccurred())
 
 					targetNodeName := nodes[0].Name
 					Expect(targetNodeName).ToNot(BeEmpty())
-					Eventually(func() bool {
+					Eventually(func(g Gomega) bool {
 						return checkNodeFeatureObject(ctx, targetNodeName)
-					}, 2*time.Minute, 5*time.Second).Should(BeTrue())
+					}).WithContext(ctx).WithPolling(5 * time.Second).WithTimeout(2 * time.Minute).Should(BeTrue())
 
 					By("Checking that node labels are created from NodeFeature CR object")
 					checkForLabels := map[string]k8sLabels{
 						targetNodeName: expectedLabelPatterns}
-					eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchLabels(checkForLabels, nodes))
+					eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(checkForLabels, nodes))
 				})
 			})
 		})
diff --git a/tests/e2e/infra/aws.yaml b/tests/e2e/infra/aws.yaml
index 251d9ba2a..6de73f56e 100644
--- a/tests/e2e/infra/aws.yaml
+++ b/tests/e2e/infra/aws.yaml
@@ -11,27 +11,16 @@ spec:
   instance:
     type: g4dn.xlarge
     region: us-west-1
-    ingressIpRanges:
-    - 18.190.12.32/32
-    - 3.143.46.93/32
-    - 52.15.119.136/32
-    - 35.155.108.162/32
-    - 35.162.190.51/32
-    - 54.201.61.24/32
-    - 52.24.205.48/32
-    - 44.235.4.62/32
-    - 44.230.241.223/32
     image:
       architecture: amd64
-      imageId: ami-0ce2cb35386fc22e9
   containerRuntime:
     install: true
     name: containerd
-  nvidiaContainerToolkit:
-    install: true
   nvidiaDriver:
     install: true
+  nvidiaContainerToolkit:
+    install: true
+    enableCDI: true
   kubernetes:
     install: true
     installer: kubeadm
-    version: v1.28.5
diff --git a/tests/e2e/internal/kube.go b/tests/e2e/internal/kube.go
new file mode 100644
index 000000000..41031f135
--- /dev/null
+++ b/tests/e2e/internal/kube.go
@@ -0,0 +1,288 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package internal
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/client-go/kubernetes"
+)
+
+const (
+	// DefaultPollInterval for Eventually checks
+	DefaultPollInterval = 2 * time.Second
+	// DefaultTimeout for Eventually checks
+	DefaultTimeout = 5 * time.Minute
+)
+
+// WaitForDaemonSetRollout waits for a DaemonSet to complete its rollout
+func WaitForDaemonSetRollout(ctx context.Context, client kubernetes.Interface, namespace, name string) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		ds, err := client.AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			return err
+		}
+
+		// Check if rollout is complete
+		if ds.Status.DesiredNumberScheduled == 0 {
+			return fmt.Errorf("daemonset %s/%s has 0 desired pods", namespace, name)
+		}
+
+		if ds.Status.NumberReady != ds.Status.DesiredNumberScheduled {
+			return fmt.Errorf("daemonset %s/%s rollout incomplete: %d/%d pods ready",
+				namespace, name, ds.Status.NumberReady, ds.Status.DesiredNumberScheduled)
+		}
+
+		if ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled {
+			return fmt.Errorf("daemonset %s/%s update incomplete: %d/%d pods updated",
+				namespace, name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)
+		}
+
+		// Check generation to ensure we're looking at the latest spec
+		if ds.Generation != ds.Status.ObservedGeneration {
+			return fmt.Errorf("daemonset %s/%s generation mismatch: %d != %d",
+				namespace, name, ds.Generation, ds.Status.ObservedGeneration)
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// WaitForAllDaemonSetsReady waits for all DaemonSets in a namespace to be ready
+func WaitForAllDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace string) error {
+	return WaitForDaemonSetsReady(ctx, client, namespace, "")
+}
+
+// WaitForDaemonSetsReady waits for DaemonSets in a namespace to be ready, optionally filtered by label selector
+func WaitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace, labelSelector string) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{
+			LabelSelector: labelSelector,
+		})
+		if err != nil {
+			return err
+		}
+
+		if len(dsList.Items) == 0 {
+			return fmt.Errorf("no daemonsets found in namespace %s with selector '%s'", namespace, labelSelector)
+		}
+
+		for _, ds := range dsList.Items {
+			// Skip if no pods are desired
+			if ds.Status.DesiredNumberScheduled == 0 {
+				continue
+			}
+
+			if ds.Status.NumberReady != ds.Status.DesiredNumberScheduled {
+				return fmt.Errorf("daemonset %s/%s rollout incomplete: %d/%d pods ready",
+					namespace, ds.Name, ds.Status.NumberReady, ds.Status.DesiredNumberScheduled)
+			}
+
+			if ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled {
+				return fmt.Errorf("daemonset %s/%s update incomplete: %d/%d pods updated",
+					namespace, ds.Name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)
+			}
+
+			// Check generation to ensure we're looking at the latest spec
+			if ds.Generation != ds.Status.ObservedGeneration {
+				return fmt.Errorf("daemonset %s/%s generation mismatch: %d != %d",
+					namespace, ds.Name, ds.Generation, ds.Status.ObservedGeneration)
+			}
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// WaitForDaemonSetPodsReady waits for all pods of a DaemonSet to be ready
+func WaitForDaemonSetPodsReady(ctx context.Context, client kubernetes.Interface, namespace, name string) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		ds, err := client.AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			return err
+		}
+
+		selector, err := metav1.LabelSelectorAsSelector(ds.Spec.Selector)
+		if err != nil {
+			return fmt.Errorf("invalid selector: %v", err)
+		}
+
+		pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+			LabelSelector: selector.String(),
+		})
+		if err != nil {
+			return err
+		}
+
+		if len(pods.Items) == 0 {
+			return fmt.Errorf("no pods found for daemonset %s/%s", namespace, name)
+		}
+
+		for _, pod := range pods.Items {
+			if !isPodReady(&pod) {
+				return fmt.Errorf("pod %s/%s is not ready", pod.Namespace, pod.Name)
+			}
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// WaitForNodeLabels waits for specific labels to appear on nodes
+func WaitForNodeLabels(ctx context.Context, client kubernetes.Interface, labelSelector string, expectedLabels map[string]string) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{
+			LabelSelector: labelSelector,
+		})
+		if err != nil {
+			return err
+		}
+
+		if len(nodes.Items) == 0 {
+			return fmt.Errorf("no nodes found with selector: %s", labelSelector)
+		}
+
+		// Check each node has the expected labels
+		for _, node := range nodes.Items {
+			for key, expectedValue := range expectedLabels {
+				actualValue, exists := node.Labels[key]
+				if !exists {
+					return fmt.Errorf("node %s missing label: %s", node.Name, key)
+				}
+				if expectedValue != "" && actualValue != expectedValue {
+					return fmt.Errorf("node %s label %s=%s, expected %s",
+						node.Name, key, actualValue, expectedValue)
+				}
+			}
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// WaitForGFDLabels waits for GPU Feature Discovery labels on nodes
+func WaitForGFDLabels(ctx context.Context, client kubernetes.Interface, nodeName string) error {
+	gfdLabels := []string{
+		"nvidia.com/gfd.timestamp",
+		"nvidia.com/cuda.driver.major",
+		"nvidia.com/cuda.driver.minor",
+		"nvidia.com/gpu.family",
+		"nvidia.com/gpu.machine",
+		"nvidia.com/gpu.memory",
+		"nvidia.com/gpu.product",
+	}
+
+	EventuallyWithOffset(1, func(g Gomega) error {
+		node, err := client.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
+		if err != nil {
+			return err
+		}
+
+		for _, label := range gfdLabels {
+			if _, exists := node.Labels[label]; !exists {
+				return fmt.Errorf("node %s missing GFD label: %s", nodeName, label)
+			}
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// WaitForPodsRunning waits for pods matching a selector to be running
+func WaitForPodsRunning(ctx context.Context, client kubernetes.Interface, namespace string, selector labels.Selector) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+			LabelSelector: selector.String(),
+		})
+		if err != nil {
+			return err
+		}
+
+		if len(pods.Items) == 0 {
+			return fmt.Errorf("no pods found matching selector: %s", selector.String())
+		}
+
+		for _, pod := range pods.Items {
+			if pod.Status.Phase != corev1.PodRunning {
+				return fmt.Errorf("pod %s/%s is %s, not Running", pod.Namespace, pod.Name, pod.Status.Phase)
+			}
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// WaitForDeploymentRollout waits for a deployment to complete its rollout
+func WaitForDeploymentRollout(ctx context.Context, client kubernetes.Interface, namespace, name string) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		deployment, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			return err
+		}
+
+		// Check if the deployment is complete
+		for _, condition := range deployment.Status.Conditions {
+			if condition.Type == appsv1.DeploymentProgressing {
+				if condition.Status != corev1.ConditionTrue {
+					return fmt.Errorf("deployment %s/%s is not progressing: %s", namespace, name, condition.Message)
+				}
+			}
+			if condition.Type == appsv1.DeploymentAvailable {
+				if condition.Status != corev1.ConditionTrue {
+					return fmt.Errorf("deployment %s/%s is not available: %s", namespace, name, condition.Message)
+				}
+			}
+		}
+
+		if deployment.Status.UpdatedReplicas != *deployment.Spec.Replicas {
+			return fmt.Errorf("deployment %s/%s update incomplete: %d/%d replicas updated",
+				namespace, name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas)
+		}
+
+		if deployment.Status.ReadyReplicas != *deployment.Spec.Replicas {
+			return fmt.Errorf("deployment %s/%s not ready: %d/%d replicas ready",
+				namespace, name, deployment.Status.ReadyReplicas, *deployment.Spec.Replicas)
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
+	return nil
+}
+
+// isPodReady checks if a pod is ready
+func isPodReady(pod *corev1.Pod) bool {
+	for _, condition := range pod.Status.Conditions {
+		if condition.Type == corev1.PodReady {
+			return condition.Status == corev1.ConditionTrue
+		}
+	}
+	return false
+}
diff --git a/tests/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go b/tests/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go
deleted file mode 100644
index 1fa351aab..000000000
--- a/tests/vendor/k8s.io/apimachinery/pkg/util/uuid/uuid.go
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
-Copyright 2014 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package uuid
-
-import (
-	"github.com/google/uuid"
-
-	"k8s.io/apimachinery/pkg/types"
-)
-
-func NewUUID() types.UID {
-	return types.UID(uuid.New().String())
-}
diff --git a/tests/vendor/modules.txt b/tests/vendor/modules.txt
index 4a7a8d3b5..5f36422dd 100644
--- a/tests/vendor/modules.txt
+++ b/tests/vendor/modules.txt
@@ -840,7 +840,6 @@ k8s.io/apimachinery/pkg/util/remotecommand
 k8s.io/apimachinery/pkg/util/runtime
 k8s.io/apimachinery/pkg/util/sets
 k8s.io/apimachinery/pkg/util/strategicpatch
-k8s.io/apimachinery/pkg/util/uuid
 k8s.io/apimachinery/pkg/util/validation
 k8s.io/apimachinery/pkg/util/validation/field
 k8s.io/apimachinery/pkg/util/version

From 67e0b073072965dccf7aae656a923990c2b345f4 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Mon, 15 Sep 2025 16:54:00 +0200
Subject: [PATCH 2/6] [no-relnote] Minor cleanups

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 tests/e2e/device-plugin_test.go         |  10 +-
 tests/e2e/e2e_test.go                   |   8 +-
 tests/e2e/gpu-feature-discovery_test.go | 206 +++++++++---------------
 3 files changed, 87 insertions(+), 137 deletions(-)

diff --git a/tests/e2e/device-plugin_test.go b/tests/e2e/device-plugin_test.go
index 1d8540333..1e1fc7688 100644
--- a/tests/e2e/device-plugin_test.go
+++ b/tests/e2e/device-plugin_test.go
@@ -19,6 +19,7 @@ package e2e
 
 import (
 	"fmt"
+	"path/filepath"
 	"strings"
 	"time"
 
@@ -28,6 +29,7 @@ import (
 	helm "github.com/mittwald/go-helm-client"
 	helmValues "github.com/mittwald/go-helm-client/values"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/rand"
 
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common/diagnostics"
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/internal"
@@ -39,7 +41,7 @@ const (
 
 // Actual test suite
 var _ = Describe("GPU Device Plugin", Ordered, Label("gpu", "e2e", "device-plugin"), func() {
-	// Init global suite vars vars
+	// Init global suite vars
 	var (
 		helmReleaseName string
 		chartSpec       helm.ChartSpec
@@ -74,7 +76,7 @@ var _ = Describe("GPU Device Plugin", Ordered, Label("gpu", "e2e", "device-plugi
 
 	BeforeAll(func(ctx SpecContext) {
 		// Create clients for apiextensions and our CRD api
-		helmReleaseName = "nvdp-e2e-test-" + randomSuffix()
+		helmReleaseName = "nvdp-e2e-test-" + rand.String(5)
 
 		chartSpec = helm.ChartSpec{
 			ReleaseName:   helmReleaseName,
@@ -145,9 +147,9 @@ var _ = Describe("GPU Device Plugin", Ordered, Label("gpu", "e2e", "device-plugi
 		})
 		It("it should run GPU jobs", Label("gpu-job"), func(ctx SpecContext) {
 			By("Creating a GPU job")
-			jobNames, err := CreateOrUpdateJobsFromFile(ctx, clientSet, "job-1.yaml", testNamespace.Name)
+			jobNames, err := CreateOrUpdateJobsFromFile(ctx, clientSet, testNamespace.Name, filepath.Join(projectRoot, "testdata", "job-1.yaml"))
 			Expect(err).NotTo(HaveOccurred())
-			Expect(jobNames).NotTo(BeEmpty())
+			Expect(jobNames).NotTo(HaveLen(1))
 
 			// Defer cleanup for the job
 			DeferCleanup(func(ctx SpecContext) {
diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go
index 54b192cf0..97b0c685e 100644
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@@ -93,6 +93,7 @@ var (
 
 	ctx         context.Context
 	packagePath string
+	projectRoot string
 )
 
 func TestMain(t *testing.T) {
@@ -103,6 +104,7 @@ func TestMain(t *testing.T) {
 	// get the package path
 	_, thisFile, _, _ := runtime.Caller(0)
 	packagePath = filepath.Dir(thisFile)
+	projectRoot = filepath.Join(packagePath, "..", "..")
 
 	ctx = context.Background()
 	getTestEnv()
@@ -286,8 +288,6 @@ func CreateTestingNS(baseName string, c clientset.Interface, labels map[string]s
 type k8sLabels map[string]string
 
 // eventuallyNonControlPlaneNodes is a helper for asserting node properties
-//
-//nolint:unused
 func eventuallyNonControlPlaneNodes(ctx context.Context, cli clientset.Interface) AsyncAssertion {
 	return Eventually(func(g Gomega) ([]corev1.Node, error) {
 		return getNonControlPlaneNodes(ctx, cli)
@@ -498,8 +498,8 @@ func getNode(nodes []corev1.Node, nodeName string) corev1.Node {
 }
 
 // CreateOrUpdateJobsFromFile creates or updates jobs from a file
-func CreateOrUpdateJobsFromFile(ctx context.Context, cli clientset.Interface, filename, namespace string) ([]string, error) {
-	jobs, err := newJobFromfile(filepath.Join(packagePath, "..", "..", "testdata", filename))
+func CreateOrUpdateJobsFromFile(ctx context.Context, cli clientset.Interface, namespace string, filename string) ([]string, error) {
+	jobs, err := newJobFromfile(filename)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create Job from file: %w", err)
 	}
diff --git a/tests/e2e/gpu-feature-discovery_test.go b/tests/e2e/gpu-feature-discovery_test.go
index f66a935a5..9eb1399c5 100644
--- a/tests/e2e/gpu-feature-discovery_test.go
+++ b/tests/e2e/gpu-feature-discovery_test.go
@@ -35,37 +35,28 @@ import (
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/internal"
 )
 
+var expectedLabelPatterns = k8sLabels{
+	"nvidia.com/gfd.timestamp":        "[0-9]{10}",
+	"nvidia.com/cuda.driver.major":    "[0-9]+",
+	"nvidia.com/cuda.driver.minor":    "[0-9]+",
+	"nvidia.com/cuda.driver.rev":      "[0-9]*",
+	"nvidia.com/cuda.runtime.major":   "[0-9]+",
+	"nvidia.com/cuda.runtime.minor":   "[0-9]+",
+	"nvidia.com/gpu.machine":          ".*",
+	"nvidia.com/gpu.count":            "[0-9]+",
+	"nvidia.com/gpu.replicas":         "[0-9]+",
+	"nvidia.com/gpu.sharing-strategy": "[none|mps|time-slicing]",
+	"nvidia.com/gpu.product":          "[A-Za-z_-]+",
+	"nvidia.com/gpu.memory":           "[0-9]+",
+	"nvidia.com/gpu.family":           "[a-z]+",
+	"nvidia.com/mig.capable":          "[true|false]",
+	"nvidia.com/gpu.compute.major":    "[0-9]+",
+	"nvidia.com/gpu.compute.minor":    "[0-9]+",
+	"nvidia.com/mps.capable":          "[true|false]",
+}
+
 // Actual test suite
 var _ = Describe("GPU Feature Discovery", Ordered, Label("gfd", "gpu", "e2e"), func() {
-	expectedLabelPatterns := k8sLabels{
-		"nvidia.com/gfd.timestamp":        "[0-9]{10}",
-		"nvidia.com/cuda.driver.major":    "[0-9]+",
-		"nvidia.com/cuda.driver.minor":    "[0-9]+",
-		"nvidia.com/cuda.driver.rev":      "[0-9]*",
-		"nvidia.com/cuda.runtime.major":   "[0-9]+",
-		"nvidia.com/cuda.runtime.minor":   "[0-9]+",
-		"nvidia.com/gpu.machine":          ".*",
-		"nvidia.com/gpu.count":            "[0-9]+",
-		"nvidia.com/gpu.replicas":         "[0-9]+",
-		"nvidia.com/gpu.sharing-strategy": "[none|mps|time-slicing]",
-		"nvidia.com/gpu.product":          "[A-Za-z_-]+",
-		"nvidia.com/gpu.memory":           "[0-9]+",
-		"nvidia.com/gpu.family":           "[a-z]+",
-		"nvidia.com/mig.capable":          "[true|false]",
-		"nvidia.com/gpu.compute.major":    "[0-9]+",
-		"nvidia.com/gpu.compute.minor":    "[0-9]+",
-		"nvidia.com/mps.capable":          "[true|false]",
-	}
-
-	defaultCollectorObjects := []string{
-		"pods",
-		"nodes",
-		"namespaces",
-		"deployments",
-		"daemonsets",
-		"nodeFeature",
-	}
-
 	// Init global suite vars
 	var (
 		helmReleaseName string
@@ -75,6 +66,18 @@ var _ = Describe("GPU Feature Discovery", Ordered, Label("gfd", "gpu", "e2e"), f
 		diagnosticsCollector diagnostics.Collector
 	)
 
+	collectLogsFrom = []string{
+		"pods",
+		"nodes",
+		"namespaces",
+		"deployments",
+		"daemonsets",
+		"nodeFeature",
+	}
+	if CollectLogsFrom != "" && CollectLogsFrom != "default" {
+		collectLogsFrom = strings.Split(CollectLogsFrom, ",")
+	}
+
 	values := helmValues.Options{
 		Values: []string{
 			fmt.Sprintf("image.repository=%s", ImageRepo),
@@ -92,12 +95,6 @@ var _ = Describe("GPU Feature Discovery", Ordered, Label("gfd", "gpu", "e2e"), f
 		return err == nil
 	}
 
-	// check Collector objects
-	collectLogsFrom = defaultCollectorObjects
-	if CollectLogsFrom != "" && CollectLogsFrom != "default" {
-		collectLogsFrom = strings.Split(CollectLogsFrom, ",")
-	}
-
 	BeforeAll(func(ctx SpecContext) {
 		helmReleaseName = "gfd-e2e-test" + rand.String(5)
 
@@ -152,115 +149,66 @@ var _ = Describe("GPU Feature Discovery", Ordered, Label("gfd", "gpu", "e2e"), f
 	})
 
 	When("When deploying GFD", Ordered, Label("serial"), func() {
-		Context("NV Driver is not installed", func() {
-			BeforeEach(func() {
-				// Skip this context when driver is enabled since "NV Driver is installed"
-				// context provides more comprehensive testing
-				if NVIDIA_DRIVER_ENABLED {
-					Skip("Skipping driver-not-installed tests when NVIDIA_DRIVER_ENABLED is true")
-				}
-			})
-
-			It("it should create nvidia.com timestamp label", Label("timestamp"), func(ctx SpecContext) {
-				nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
-				Expect(err).NotTo(HaveOccurred())
-				Expect(len(nodeList.Items)).ToNot(BeZero())
-
-				// We pick one node targeted for our NodeFeature objects
-				nodes, err := getNonControlPlaneNodes(ctx, clientSet)
-				Expect(err).NotTo(HaveOccurred())
-
-				targetNodeName := nodes[0].Name
-				Expect(targetNodeName).ToNot(BeEmpty())
+		It("it should create nvidia.com labels", func(ctx SpecContext) {
+			nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+			Expect(err).NotTo(HaveOccurred())
+			Expect(len(nodeList.Items)).ToNot(BeZero())
 
-				By("Checking the node labels")
-				labelChecker := map[string]k8sLabels{
-					targetNodeName: {
-						"nvidia.com/gfd.timestamp": "[0-9]{10}",
-					}}
-				eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
-			})
-			Context("and the NodeFeature API is enabled", func() {
-				It("gfd should create node feature object", Label("nodefeature"), func(ctx SpecContext) {
-					By("Updating GFD Helm chart values")
-					newValues := values
-					newValues.Values = append(newValues.Values, "nfd.enableNodeFeatureApi=true")
-					chartSpec.ValuesOptions = newValues
-					chartSpec.Replace = true
-					_, err := helmClient.UpgradeChart(ctx, &chartSpec, nil)
-					Expect(err).NotTo(HaveOccurred())
+			// We pick one node targeted for our NodeFeature objects
+			nodes, err := getNonControlPlaneNodes(ctx, clientSet)
+			Expect(err).NotTo(HaveOccurred())
 
-					By("Checking if NodeFeature CR object is created")
-					nodes, err := getNonControlPlaneNodes(ctx, clientSet)
-					Expect(err).NotTo(HaveOccurred())
+			targetNodeName := nodes[0].Name
+			Expect(targetNodeName).ToNot(BeEmpty())
 
-					targetNodeName := nodes[0].Name
-					Expect(targetNodeName).ToNot(BeEmpty())
-					Eventually(func(g Gomega) bool {
-						return checkNodeFeatureObject(ctx, targetNodeName)
-					}).WithContext(ctx).WithPolling(5 * time.Second).WithTimeout(2 * time.Minute).Should(BeTrue())
+			By("Checking the node labels")
 
-					By("Checking that node labels are created from NodeFeature object")
-					labelChecker := map[string]k8sLabels{
-						targetNodeName: {
-							"nvidia.com/gfd.timestamp": "[0-9]{10}",
-						}}
-					eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
-				})
-			})
-		})
-
-		When("NV Driver is installed", func() {
-			It("it should create nvidia.com labels", Label("driver", "labels"), func(ctx SpecContext) {
-				if !NVIDIA_DRIVER_ENABLED {
-					Skip("NVIDIA_DRIVER_ENABLED is not set")
+			labelChecker := map[string]k8sLabels{
+				targetNodeName: expectedLabelPatterns,
+			}
+			if !NVIDIA_DRIVER_ENABLED {
+				// If the NVIDIA driver is not installed, we only check the
+				// timestamp label to allow for local testing on non-GPU
+				// systems.
+				labelChecker[targetNodeName] = k8sLabels{
+					"nvidia.com/gfd.timestamp": "[0-9]{10}",
 				}
-
-				By("Checking the node labels")
-				nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+			}
+			eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
+		})
+		Context("and the NodeFeature API is enabled", func() {
+			It("gfd should create node feature object", Label("nodefeature"), func(ctx SpecContext) {
+				By("Updating GFD Helm chart values")
+				newValues := values
+				newValues.Values = append(newValues.Values, "nfd.enableNodeFeatureApi=true")
+				chartSpec.ValuesOptions = newValues
+				chartSpec.Replace = true
+				_, err := helmClient.UpgradeChart(ctx, &chartSpec, nil)
 				Expect(err).NotTo(HaveOccurred())
-				Expect(len(nodeList.Items)).ToNot(BeZero())
 
-				// We pick one node targeted for our NodeFeature objects
+				By("Checking if NodeFeature CR object is created")
 				nodes, err := getNonControlPlaneNodes(ctx, clientSet)
 				Expect(err).NotTo(HaveOccurred())
 
 				targetNodeName := nodes[0].Name
 				Expect(targetNodeName).ToNot(BeEmpty())
+				Eventually(func(g Gomega) bool {
+					return checkNodeFeatureObject(ctx, targetNodeName)
+				}).WithContext(ctx).WithPolling(5 * time.Second).WithTimeout(2 * time.Minute).Should(BeTrue())
 
-				By("Checking the node labels")
+				By("Checking that node labels are created from NodeFeature object")
 				labelChecker := map[string]k8sLabels{
-					targetNodeName: expectedLabelPatterns}
-				eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
-			})
-			Context("and the NodeFeature API is enabled", func() {
-				It("gfd should create node feature object", Label("driver", "nodefeature"), func(ctx SpecContext) {
-					if !NVIDIA_DRIVER_ENABLED {
-						Skip("NVIDIA_DRIVER_ENABLED is not set")
+					targetNodeName: expectedLabelPatterns,
+				}
+				if !NVIDIA_DRIVER_ENABLED {
+					// If the NVIDIA driver is not installed, we only check the
+					// timestamp label to allow for local testing on non-GPU
+					// systems.
+					labelChecker[targetNodeName] = k8sLabels{
+						"nvidia.com/gfd.timestamp": "[0-9]{10}",
 					}
-					By("Updating GFD Helm chart values")
-					newValues := values
-					newValues.Values = append(newValues.Values, "nfd.enableNodeFeatureApi=true")
-					chartSpec.ValuesOptions = newValues
-					chartSpec.Replace = true
-					_, err := helmClient.UpgradeChart(ctx, &chartSpec, nil)
-					Expect(err).NotTo(HaveOccurred())
-
-					By("Checking if NodeFeature CR object is created")
-					nodes, err := getNonControlPlaneNodes(ctx, clientSet)
-					Expect(err).NotTo(HaveOccurred())
-
-					targetNodeName := nodes[0].Name
-					Expect(targetNodeName).ToNot(BeEmpty())
-					Eventually(func(g Gomega) bool {
-						return checkNodeFeatureObject(ctx, targetNodeName)
-					}).WithContext(ctx).WithPolling(5 * time.Second).WithTimeout(2 * time.Minute).Should(BeTrue())
-
-					By("Checking that node labels are created from NodeFeature CR object")
-					checkForLabels := map[string]k8sLabels{
-						targetNodeName: expectedLabelPatterns}
-					eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(checkForLabels, nodes))
-				})
+				}
+				eventuallyNonControlPlaneNodes(ctx, clientSet).Should(MatchLabels(labelChecker, nodes))
 			})
 		})
 	})

From df67aa8b191390dc8065ca76c3ac8e56c0cf5dcb Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 17 Sep 2025 14:40:11 +0200
Subject: [PATCH 3/6] TOFIX: reduce diff

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 tests/e2e/device-plugin_test.go         |   3 +-
 tests/e2e/e2e_test.go                   |  42 ++++
 tests/e2e/gpu-feature-discovery_test.go |   3 +-
 tests/e2e/internal/kube.go              | 288 ------------------------
 4 files changed, 44 insertions(+), 292 deletions(-)
 delete mode 100644 tests/e2e/internal/kube.go

diff --git a/tests/e2e/device-plugin_test.go b/tests/e2e/device-plugin_test.go
index 1e1fc7688..917d68ca4 100644
--- a/tests/e2e/device-plugin_test.go
+++ b/tests/e2e/device-plugin_test.go
@@ -32,7 +32,6 @@ import (
 	"k8s.io/apimachinery/pkg/util/rand"
 
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common/diagnostics"
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/internal"
 )
 
 const (
@@ -96,7 +95,7 @@ var _ = Describe("GPU Device Plugin", Ordered, Label("gpu", "e2e", "device-plugi
 		// Note: DaemonSet names are dynamically generated with the Helm release prefix,
 		// so we wait for all DaemonSets in the namespace rather than specific names
 		By("Waiting for all DaemonSets to be ready")
-		err = internal.WaitForDaemonSetsReady(ctx, clientSet, testNamespace.Name, "app.kubernetes.io/name=nvidia-device-plugin")
+		err = waitForDaemonSetsReady(ctx, clientSet, testNamespace.Name, "app.kubernetes.io/name=nvidia-device-plugin")
 		Expect(err).NotTo(HaveOccurred())
 	})
 
diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go
index 97b0c685e..2c6c1661c 100644
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@@ -874,3 +874,45 @@ func getEnvVarOrDefault[T any](key string, defaultValue T) T {
 	}
 	return val
 }
+
+// waitForDaemonSetsReady waits for DaemonSets in a namespace to be ready, optionally filtered by label selector
+func waitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace, labelSelector string) error {
+	EventuallyWithOffset(1, func(g Gomega) error {
+		dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{
+			LabelSelector: labelSelector,
+		})
+		if err != nil {
+			return err
+		}
+
+		if len(dsList.Items) == 0 {
+			return fmt.Errorf("no daemonsets found in namespace %s with selector '%s'", namespace, labelSelector)
+		}
+
+		for _, ds := range dsList.Items {
+			// Skip if no pods are desired
+			if ds.Status.DesiredNumberScheduled == 0 {
+				continue
+			}
+
+			if ds.Status.NumberReady != ds.Status.DesiredNumberScheduled {
+				return fmt.Errorf("daemonset %s/%s rollout incomplete: %d/%d pods ready",
+					namespace, ds.Name, ds.Status.NumberReady, ds.Status.DesiredNumberScheduled)
+			}
+
+			if ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled {
+				return fmt.Errorf("daemonset %s/%s update incomplete: %d/%d pods updated",
+					namespace, ds.Name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)
+			}
+
+			// Check generation to ensure we're looking at the latest spec
+			if ds.Generation != ds.Status.ObservedGeneration {
+				return fmt.Errorf("daemonset %s/%s generation mismatch: %d != %d",
+					namespace, ds.Name, ds.Generation, ds.Status.ObservedGeneration)
+			}
+		}
+
+		return nil
+	}).WithContext(ctx).WithPolling(2 * time.Second).WithTimeout(5 * time.Minute).Should(Succeed())
+	return nil
+}
diff --git a/tests/e2e/gpu-feature-discovery_test.go b/tests/e2e/gpu-feature-discovery_test.go
index 9eb1399c5..8b2170107 100644
--- a/tests/e2e/gpu-feature-discovery_test.go
+++ b/tests/e2e/gpu-feature-discovery_test.go
@@ -32,7 +32,6 @@ import (
 	"k8s.io/apimachinery/pkg/util/rand"
 
 	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/common/diagnostics"
-	"github.com/NVIDIA/k8s-device-plugin/tests/e2e/internal"
 )
 
 var expectedLabelPatterns = k8sLabels{
@@ -117,7 +116,7 @@ var _ = Describe("GPU Feature Discovery", Ordered, Label("gfd", "gpu", "e2e"), f
 		// Note: DaemonSet names are dynamically generated with the Helm release prefix,
 		// so we wait for all DaemonSets in the namespace rather than specific names
 		By("Waiting for all DaemonSets to be ready")
-		err = internal.WaitForDaemonSetsReady(ctx, clientSet, testNamespace.Name, "app.kubernetes.io/name=nvidia-device-plugin")
+		err = waitForDaemonSetsReady(ctx, clientSet, testNamespace.Name, "app.kubernetes.io/name=nvidia-device-plugin")
 		Expect(err).NotTo(HaveOccurred())
 	})
 
diff --git a/tests/e2e/internal/kube.go b/tests/e2e/internal/kube.go
deleted file mode 100644
index 41031f135..000000000
--- a/tests/e2e/internal/kube.go
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package internal
-
-import (
-	"context"
-	"fmt"
-	"time"
-
-	. "github.com/onsi/gomega"
-	appsv1 "k8s.io/api/apps/v1"
-	corev1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/client-go/kubernetes"
-)
-
-const (
-	// DefaultPollInterval for Eventually checks
-	DefaultPollInterval = 2 * time.Second
-	// DefaultTimeout for Eventually checks
-	DefaultTimeout = 5 * time.Minute
-)
-
-// WaitForDaemonSetRollout waits for a DaemonSet to complete its rollout
-func WaitForDaemonSetRollout(ctx context.Context, client kubernetes.Interface, namespace, name string) error {
-	EventuallyWithOffset(1, func(g Gomega) error {
-		ds, err := client.AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{})
-		if err != nil {
-			return err
-		}
-
-		// Check if rollout is complete
-		if ds.Status.DesiredNumberScheduled == 0 {
-			return fmt.Errorf("daemonset %s/%s has 0 desired pods", namespace, name)
-		}
-
-		if ds.Status.NumberReady != ds.Status.DesiredNumberScheduled {
-			return fmt.Errorf("daemonset %s/%s rollout incomplete: %d/%d pods ready",
-				namespace, name, ds.Status.NumberReady, ds.Status.DesiredNumberScheduled)
-		}
-
-		if ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled {
-			return fmt.Errorf("daemonset %s/%s update incomplete: %d/%d pods updated",
-				namespace, name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)
-		}
-
-		// Check generation to ensure we're looking at the latest spec
-		if ds.Generation != ds.Status.ObservedGeneration {
-			return fmt.Errorf("daemonset %s/%s generation mismatch: %d != %d",
-				namespace, name, ds.Generation, ds.Status.ObservedGeneration)
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// WaitForAllDaemonSetsReady waits for all DaemonSets in a namespace to be ready
-func WaitForAllDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace string) error {
-	return WaitForDaemonSetsReady(ctx, client, namespace, "")
-}
-
-// WaitForDaemonSetsReady waits for DaemonSets in a namespace to be ready, optionally filtered by label selector
-func WaitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace, labelSelector string) error {
-	EventuallyWithOffset(1, func(g Gomega) error {
-		dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{
-			LabelSelector: labelSelector,
-		})
-		if err != nil {
-			return err
-		}
-
-		if len(dsList.Items) == 0 {
-			return fmt.Errorf("no daemonsets found in namespace %s with selector '%s'", namespace, labelSelector)
-		}
-
-		for _, ds := range dsList.Items {
-			// Skip if no pods are desired
-			if ds.Status.DesiredNumberScheduled == 0 {
-				continue
-			}
-
-			if ds.Status.NumberReady != ds.Status.DesiredNumberScheduled {
-				return fmt.Errorf("daemonset %s/%s rollout incomplete: %d/%d pods ready",
-					namespace, ds.Name, ds.Status.NumberReady, ds.Status.DesiredNumberScheduled)
-			}
-
-			if ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled {
-				return fmt.Errorf("daemonset %s/%s update incomplete: %d/%d pods updated",
-					namespace, ds.Name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)
-			}
-
-			// Check generation to ensure we're looking at the latest spec
-			if ds.Generation != ds.Status.ObservedGeneration {
-				return fmt.Errorf("daemonset %s/%s generation mismatch: %d != %d",
-					namespace, ds.Name, ds.Generation, ds.Status.ObservedGeneration)
-			}
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// WaitForDaemonSetPodsReady waits for all pods of a DaemonSet to be ready
-func WaitForDaemonSetPodsReady(ctx context.Context, client kubernetes.Interface, namespace, name string) error {
-	EventuallyWithOffset(1, func(g Gomega) error {
-		ds, err := client.AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{})
-		if err != nil {
-			return err
-		}
-
-		selector, err := metav1.LabelSelectorAsSelector(ds.Spec.Selector)
-		if err != nil {
-			return fmt.Errorf("invalid selector: %v", err)
-		}
-
-		pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
-			LabelSelector: selector.String(),
-		})
-		if err != nil {
-			return err
-		}
-
-		if len(pods.Items) == 0 {
-			return fmt.Errorf("no pods found for daemonset %s/%s", namespace, name)
-		}
-
-		for _, pod := range pods.Items {
-			if !isPodReady(&pod) {
-				return fmt.Errorf("pod %s/%s is not ready", pod.Namespace, pod.Name)
-			}
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// WaitForNodeLabels waits for specific labels to appear on nodes
-func WaitForNodeLabels(ctx context.Context, client kubernetes.Interface, labelSelector string, expectedLabels map[string]string) error {
-	EventuallyWithOffset(1, func(g Gomega) error {
-		nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{
-			LabelSelector: labelSelector,
-		})
-		if err != nil {
-			return err
-		}
-
-		if len(nodes.Items) == 0 {
-			return fmt.Errorf("no nodes found with selector: %s", labelSelector)
-		}
-
-		// Check each node has the expected labels
-		for _, node := range nodes.Items {
-			for key, expectedValue := range expectedLabels {
-				actualValue, exists := node.Labels[key]
-				if !exists {
-					return fmt.Errorf("node %s missing label: %s", node.Name, key)
-				}
-				if expectedValue != "" && actualValue != expectedValue {
-					return fmt.Errorf("node %s label %s=%s, expected %s",
-						node.Name, key, actualValue, expectedValue)
-				}
-			}
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// WaitForGFDLabels waits for GPU Feature Discovery labels on nodes
-func WaitForGFDLabels(ctx context.Context, client kubernetes.Interface, nodeName string) error {
-	gfdLabels := []string{
-		"nvidia.com/gfd.timestamp",
-		"nvidia.com/cuda.driver.major",
-		"nvidia.com/cuda.driver.minor",
-		"nvidia.com/gpu.family",
-		"nvidia.com/gpu.machine",
-		"nvidia.com/gpu.memory",
-		"nvidia.com/gpu.product",
-	}
-
-	EventuallyWithOffset(1, func(g Gomega) error {
-		node, err := client.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
-		if err != nil {
-			return err
-		}
-
-		for _, label := range gfdLabels {
-			if _, exists := node.Labels[label]; !exists {
-				return fmt.Errorf("node %s missing GFD label: %s", nodeName, label)
-			}
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// WaitForPodsRunning waits for pods matching a selector to be running
-func WaitForPodsRunning(ctx context.Context, client kubernetes.Interface, namespace string, selector labels.Selector) error {
-	EventuallyWithOffset(1, func(g Gomega) error {
-		pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
-			LabelSelector: selector.String(),
-		})
-		if err != nil {
-			return err
-		}
-
-		if len(pods.Items) == 0 {
-			return fmt.Errorf("no pods found matching selector: %s", selector.String())
-		}
-
-		for _, pod := range pods.Items {
-			if pod.Status.Phase != corev1.PodRunning {
-				return fmt.Errorf("pod %s/%s is %s, not Running", pod.Namespace, pod.Name, pod.Status.Phase)
-			}
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// WaitForDeploymentRollout waits for a deployment to complete its rollout
-func WaitForDeploymentRollout(ctx context.Context, client kubernetes.Interface, namespace, name string) error {
-	EventuallyWithOffset(1, func(g Gomega) error {
-		deployment, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
-		if err != nil {
-			return err
-		}
-
-		// Check if the deployment is complete
-		for _, condition := range deployment.Status.Conditions {
-			if condition.Type == appsv1.DeploymentProgressing {
-				if condition.Status != corev1.ConditionTrue {
-					return fmt.Errorf("deployment %s/%s is not progressing: %s", namespace, name, condition.Message)
-				}
-			}
-			if condition.Type == appsv1.DeploymentAvailable {
-				if condition.Status != corev1.ConditionTrue {
-					return fmt.Errorf("deployment %s/%s is not available: %s", namespace, name, condition.Message)
-				}
-			}
-		}
-
-		if deployment.Status.UpdatedReplicas != *deployment.Spec.Replicas {
-			return fmt.Errorf("deployment %s/%s update incomplete: %d/%d replicas updated",
-				namespace, name, deployment.Status.UpdatedReplicas, *deployment.Spec.Replicas)
-		}
-
-		if deployment.Status.ReadyReplicas != *deployment.Spec.Replicas {
-			return fmt.Errorf("deployment %s/%s not ready: %d/%d replicas ready",
-				namespace, name, deployment.Status.ReadyReplicas, *deployment.Spec.Replicas)
-		}
-
-		return nil
-	}).WithContext(ctx).WithPolling(DefaultPollInterval).WithTimeout(DefaultTimeout).Should(Succeed())
-	return nil
-}
-
-// isPodReady checks if a pod is ready
-func isPodReady(pod *corev1.Pod) bool {
-	for _, condition := range pod.Status.Conditions {
-		if condition.Type == corev1.PodReady {
-			return condition.Status == corev1.ConditionTrue
-		}
-	}
-	return false
-}

From 189747b3d5d37e3046fcd4fc870eadc0d0f6ee56 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 17 Sep 2025 15:34:12 +0200
Subject: [PATCH 4/6] [no-relnote] Set ginkgo version on install

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 tests/e2e/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile
index 03c15c1fd..711654215 100644
--- a/tests/e2e/Makefile
+++ b/tests/e2e/Makefile
@@ -34,9 +34,10 @@ GINKGO_REPORT_ARGS := --json-report=$(LOG_ARTIFACTS)/report.json --junit-report=
 
 .PHONY: ginkgo test clean-artifacts
 
+GINKGO_VERSION = $(shell grep -Eo "github.com/onsi/ginkgo/v2.*$$" ./tests/go.mod | sed -e 's&github.com/onsi/ginkgo/v2[[:space:]]&&g')
 ginkgo:
 	mkdir -p $(CURDIR)/bin
-	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@latest
+	GOBIN=$(CURDIR)/bin go install github.com/onsi/ginkgo/v2/ginkgo@$(GINKGO_VERSION)
 
 # Create artifacts directory
 $(LOG_ARTIFACTS):

From d2dde98928d33d7a087266a6360f7ca92b59da48 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 17 Sep 2025 16:34:43 +0200
Subject: [PATCH 5/6] fixup!  TOFIX: reduce diff

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 tests/e2e/e2e_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go
index 2c6c1661c..ecf363a59 100644
--- a/tests/e2e/e2e_test.go
+++ b/tests/e2e/e2e_test.go
@@ -876,7 +876,7 @@ func getEnvVarOrDefault[T any](key string, defaultValue T) T {
 }
 
 // waitForDaemonSetsReady waits for DaemonSets in a namespace to be ready, optionally filtered by label selector
-func waitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace, labelSelector string) error {
+func waitForDaemonSetsReady(ctx context.Context, client clientset.Interface, namespace, labelSelector string) error {
 	EventuallyWithOffset(1, func(g Gomega) error {
 		dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{
 			LabelSelector: labelSelector,

From 49680b4b8ec4fa53a99e453c884162b0fd3553ba Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 17 Sep 2025 17:01:22 +0200
Subject: [PATCH 6/6] fixup! [no-relnote] Minor cleanups

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 tests/e2e/device-plugin_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/device-plugin_test.go b/tests/e2e/device-plugin_test.go
index 917d68ca4..d6f1ea298 100644
--- a/tests/e2e/device-plugin_test.go
+++ b/tests/e2e/device-plugin_test.go
@@ -148,7 +148,7 @@ var _ = Describe("GPU Device Plugin", Ordered, Label("gpu", "e2e", "device-plugi
 			By("Creating a GPU job")
 			jobNames, err := CreateOrUpdateJobsFromFile(ctx, clientSet, testNamespace.Name, filepath.Join(projectRoot, "testdata", "job-1.yaml"))
 			Expect(err).NotTo(HaveOccurred())
-			Expect(jobNames).NotTo(HaveLen(1))
+			Expect(jobNames).To(HaveLen(1))
 
 			// Defer cleanup for the job
 			DeferCleanup(func(ctx SpecContext) {