Skip to content

Commit 08ac428

Browse files
ArangoGutierrezelezar
authored andcommitted
[no-relnote] Add E2E tests for systemd unit
Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 408dc28 commit 08ac428

File tree

8 files changed

+231
-13
lines changed

8 files changed

+231
-13
lines changed

tests/e2e/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ GINKGO_BIN := $(CURDIR)/bin/ginkgo
2424
# current available tests:
2525
# - nvidia-container-cli
2626
# - docker
27+
# - nvidia-cdi-refresh
2728
GINKGO_FOCUS ?=
2829

2930
test: $(GINKGO_BIN)

tests/e2e/e2e_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ var (
4242
sshUser string
4343
sshHost string
4444
sshPort string
45+
46+
testContainerName = "ctk-e2e-test-container"
4547
)
4648

4749
func TestMain(t *testing.T) {
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"context"
21+
"fmt"
22+
23+
. "github.com/onsi/ginkgo/v2"
24+
. "github.com/onsi/gomega"
25+
)
26+
27+
const (
28+
nvidiaCdiRefreshDegradedSystemdTemplate = `
29+
# Read the TMPDIR
30+
TMPDIR=$(cat /tmp/ctk_e2e_temp_dir.txt)
31+
export TMPDIR
32+
33+
# uninstall the nvidia-container-toolkit
34+
apt-get remove -y nvidia-container-toolkit nvidia-container-toolkit-base libnvidia-container-tools libnvidia-container1
35+
apt-get autoremove -y
36+
37+
# Remove the cdi file if it exists
38+
if [ -f /var/run/cdi/nvidia.yaml ]; then
39+
rm -f /var/run/cdi/nvidia.yaml
40+
fi
41+
42+
# Stop the nvidia-cdi-refresh.path and nvidia-cdi-refresh.service units
43+
systemctl stop nvidia-cdi-refresh.path
44+
systemctl stop nvidia-cdi-refresh.service
45+
46+
# Reload the systemd daemon
47+
systemctl daemon-reload
48+
49+
# Start the dummy service to force systemd to enter a degraded state
50+
cat <<EOF > /etc/systemd/system/dummy.service
51+
[Unit]
52+
Description=Dummy systemd service
53+
54+
[Service]
55+
Type=oneshot
56+
ExecStart=/usr/bin/sh -c "exit 0"
57+
EOF
58+
59+
# We know the dummy service will fail, so we can ignore the error
60+
systemctl start dummy.service 2>/dev/null || true
61+
62+
# Install the nvidia-container-toolkit
63+
dpkg -i ${TMPDIR}/packages/ubuntu18.04/amd64/libnvidia-container1_*_amd64.deb ${TMPDIR}/packages/ubuntu18.04/amd64/nvidia-container-toolkit-base_*_amd64.deb ${TMPDIR}/packages/ubuntu18.04/amd64/libnvidia-container-tools_*_amd64.deb
64+
`
65+
nvidiaCdiRefreshPathActiveTemplate = `
66+
if ! systemctl status nvidia-cdi-refresh.path | grep "Active: active"; then
67+
echo "nvidia-cdi-refresh.path is not Active"
68+
exit 1
69+
fi
70+
`
71+
nvidiaCdiRefreshServiceLoadedTemplate = `
72+
if ! systemctl status nvidia-cdi-refresh.service | grep "Loaded: loaded"; then
73+
echo "nvidia-cdi-refresh.service is not loaded"
74+
exit 1
75+
fi
76+
`
77+
78+
nvidiaCdiRefreshFileExistsTemplate = `
79+
# is /var/run/cdi/nvidia.yaml exists? and exit with 0 if it does not exist
80+
if [ ! -f /var/run/cdi/nvidia.yaml ]; then
81+
echo "nvidia.yaml file does not exist"
82+
exit 1
83+
fi
84+
85+
# generate the nvidia.yaml file
86+
nvidia-ctk cdi generate --output=/tmp/nvidia.yaml
87+
88+
# diff the generated file with the one in /var/run/cdi/nvidia.yaml and exit with 0 if they are the same
89+
if ! diff /var/run/cdi/nvidia.yaml /tmp/nvidia.yaml; then
90+
echo "nvidia.yaml file is different"
91+
exit 1
92+
fi
93+
`
94+
95+
nvidiaCdiRefreshUpgradeTemplate = `
96+
# remove the generated files
97+
rm /var/run/cdi/nvidia.yaml /tmp/nvidia.yaml
98+
99+
# Touch the nvidia-ctk binary to change the mtime
100+
# This will trigger the nvidia-cdi-refresh.path unit to call the
101+
# nvidia-cdi-refresh.service unit, simulating a change(update/downgrade) in the nvidia-ctk binary.
102+
touch $(which nvidia-ctk)
103+
104+
# wait for 3 seconds
105+
sleep 3
106+
107+
# Check if the file /var/run/cdi/nvidia.yaml is created
108+
if [ ! -f /var/run/cdi/nvidia.yaml ]; then
109+
echo "nvidia.yaml file is not created after updating the modules.dep file"
110+
exit 1
111+
fi
112+
113+
# generate the nvidia.yaml file
114+
nvidia-ctk cdi generate --output=/tmp/nvidia.yaml
115+
116+
# diff the generated file with the one in /var/run/cdi/nvidia.yaml and exit with 0 if they are the same
117+
if ! diff /var/run/cdi/nvidia.yaml /tmp/nvidia.yaml; then
118+
echo "nvidia.yaml file is different"
119+
exit 1
120+
fi
121+
`
122+
)
123+
124+
var _ = Describe("nvidia-cdi-refresh", Ordered, ContinueOnFailure, Label("systemd-unit"), func() {
125+
var (
126+
nestedContainerRunner Runner
127+
// TODO(@ArangoGutierrez): https://github.com/NVIDIA/nvidia-container-toolkit/pull/1235/files#r2302013660
128+
outerContainerImage = "docker.io/kindest/base:v20250521-31a79fd4"
129+
)
130+
131+
BeforeAll(func(ctx context.Context) {
132+
var err error
133+
nestedContainerRunner, err = NewNestedContainerRunner(runner, outerContainerImage, installCTK, testContainerName)
134+
Expect(err).ToNot(HaveOccurred())
135+
})
136+
137+
AfterAll(func(ctx context.Context) {
138+
// Cleanup: remove the container and the temporary script on the host.
139+
// Use || true to ensure cleanup doesn't fail the test
140+
runner.Run(fmt.Sprintf("docker rm -f %s 2>/dev/null || true", testContainerName)) //nolint:errcheck
141+
})
142+
143+
When("installing nvidia-container-toolkit", Ordered, func() {
144+
It("should load the nvidia-cdi-refresh.path unit", func(ctx context.Context) {
145+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshPathActiveTemplate)
146+
Expect(err).ToNot(HaveOccurred())
147+
})
148+
149+
It("should load the nvidia-cdi-refresh.service unit", func(ctx context.Context) {
150+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshServiceLoadedTemplate)
151+
Expect(err).ToNot(HaveOccurred())
152+
})
153+
154+
It("should generate the nvidia.yaml file", func(ctx context.Context) {
155+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshFileExistsTemplate)
156+
Expect(err).ToNot(HaveOccurred())
157+
})
158+
159+
It("should refresh the nvidia.yaml file after upgrading the nvidia-container-toolkit", func(ctx context.Context) {
160+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshUpgradeTemplate)
161+
Expect(err).ToNot(HaveOccurred())
162+
})
163+
})
164+
165+
When("installing nvidia-container-toolkit on a system with a degraded systemd", Ordered, func() {
166+
BeforeAll(func(ctx context.Context) {
167+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshDegradedSystemdTemplate)
168+
Expect(err).ToNot(HaveOccurred())
169+
})
170+
171+
It("should load the nvidia-cdi-refresh.path unit", func(ctx context.Context) {
172+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshPathActiveTemplate)
173+
Expect(err).ToNot(HaveOccurred())
174+
})
175+
176+
It("should load the nvidia-cdi-refresh.service unit", func(ctx context.Context) {
177+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshServiceLoadedTemplate)
178+
Expect(err).ToNot(HaveOccurred())
179+
})
180+
181+
It("should generate the nvidia.yaml file", func(ctx context.Context) {
182+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshFileExistsTemplate)
183+
Expect(err).ToNot(HaveOccurred())
184+
})
185+
186+
It("should generate the nvidia.yaml file", func(ctx context.Context) {
187+
_, _, err := nestedContainerRunner.Run(nvidiaCdiRefreshFileExistsTemplate)
188+
Expect(err).ToNot(HaveOccurred())
189+
})
190+
})
191+
})

tests/e2e/nvidia-container-cli_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,12 @@ IN_NS
7272
var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libnvidia-container"), func() {
7373
var (
7474
nestedContainerRunner Runner
75-
containerName = "node-container-e2e"
7675
hostOutput string
7776
)
7877

7978
BeforeAll(func(ctx context.Context) {
8079
var err error
81-
nestedContainerRunner, err = NewNestedContainerRunner(runner, installCTK, containerName)
80+
nestedContainerRunner, err = NewNestedContainerRunner(runner, "ubuntu", installCTK, testContainerName)
8281
Expect(err).ToNot(HaveOccurred())
8382

8483
// We also need to install the toolkit in the nested runner.
@@ -103,7 +102,7 @@ var _ = Describe("nvidia-container-cli", Ordered, ContinueOnFailure, Label("libn
103102
AfterAll(func(ctx context.Context) {
104103
// Cleanup: remove the container and the temporary script on the host.
105104
// Use || true to ensure cleanup doesn't fail the test
106-
runner.Run(fmt.Sprintf("docker rm -f %s 2>/dev/null || true", containerName)) //nolint:errcheck
105+
runner.Run(fmt.Sprintf("docker rm -f %s 2>/dev/null || true", testContainerName)) //nolint:errcheck
107106
})
108107

109108
It("should report the same GPUs inside the container as on the host", func(ctx context.Context) {

tests/e2e/runner.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,35 @@ const (
3535
{{ range $i, $a := .AdditionalArguments -}}
3636
{{ $a }} \
3737
{{ end -}}
38-
ubuntu sleep infinity`
38+
{{.OuterContainerImage}} sleep infinity`
3939

4040
installDockerTemplate = `
4141
export DEBIAN_FRONTEND=noninteractive
4242
4343
# Add Docker official GPG key:
4444
apt-get update
45-
apt-get install -y ca-certificates curl apt-utils gnupg2
45+
apt-get install -y apt-utils ca-certificates curl gnupg2
4646
install -m 0755 -d /etc/apt/keyrings
47-
curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
47+
48+
# Read OS information from /etc/os-release
49+
. /etc/os-release
50+
51+
if [ "${ID}" = "debian" ]; then
52+
curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
53+
else
54+
curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
55+
fi
4856
chmod a+r /etc/apt/keyrings/docker.asc
4957
5058
# Add the repository to Apt sources:
51-
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo \"${UBUNTU_CODENAME:-$VERSION_CODENAME}\") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
59+
if [ "${ID}" = "debian" ]; then
60+
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian ${VERSION_CODENAME} stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
61+
else
62+
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu ${UBUNTU_CODENAME:-$VERSION_CODENAME} stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
63+
fi
5264
apt-get update
5365
54-
apt-get install -y docker-ce docker-ce-cli containerd.io
66+
apt-get install -y docker-ce docker-ce-cli
5567
5668
# start dockerd in the background
5769
dockerd &
@@ -132,7 +144,7 @@ func NewRunner(opts ...runnerOption) Runner {
132144
// NewNestedContainerRunner creates a new nested container runner.
133145
// A nested container runs a container inside another container based on a
134146
// given runner (remote or local).
135-
func NewNestedContainerRunner(runner Runner, installCTK bool, containerName string) (Runner, error) {
147+
func NewNestedContainerRunner(runner Runner, baseImage string, installCTK bool, containerName string) (Runner, error) {
136148
additionalContainerArguments := []string{}
137149

138150
// If a container with the same name exists from a previous test run, remove it first.
@@ -195,6 +207,9 @@ func NewNestedContainerRunner(runner Runner, installCTK bool, containerName stri
195207
}
196208
}
197209

210+
// Mount the /lib/modules directory as a volume to enable the nvidia-cdi-refresh service
211+
additionalContainerArguments = append(additionalContainerArguments, "-v /lib/modules:/lib/modules")
212+
198213
// Launch the container in detached mode.
199214
var outerContainerScriptBuilder strings.Builder
200215
outerContainerTemplate, err := template.New("outerContainer").Parse(outerContainerTemplate)
@@ -204,9 +219,11 @@ func NewNestedContainerRunner(runner Runner, installCTK bool, containerName stri
204219
err = outerContainerTemplate.Execute(&outerContainerScriptBuilder, struct {
205220
ContainerName string
206221
AdditionalArguments []string
222+
OuterContainerImage string
207223
}{
208224
ContainerName: containerName,
209225
AdditionalArguments: additionalContainerArguments,
226+
OuterContainerImage: baseImage,
210227
})
211228
if err != nil {
212229
return nil, fmt.Errorf("failed to execute start container template: %w", err)

tests/go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ require (
1616
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
1717
github.com/google/go-cmp v0.7.0 // indirect
1818
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
19+
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
1920
go.uber.org/automaxprocs v1.6.0 // indirect
2021
go.yaml.in/yaml/v3 v3.0.4 // indirect
2122
golang.org/x/net v0.43.0 // indirect
2223
golang.org/x/sys v0.36.0 // indirect
2324
golang.org/x/text v0.29.0 // indirect
2425
golang.org/x/tools v0.36.0 // indirect
26+
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b // indirect
2527
)

tests/go.sum

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
1010
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
1111
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
1212
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
13-
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
14-
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
13+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
14+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
1515
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
1616
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
17+
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
18+
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
1719
github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
1820
github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
1921
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
@@ -43,7 +45,7 @@ golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s
4345
google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
4446
google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
4547
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
46-
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
47-
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
48+
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U=
49+
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
4850
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
4951
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

tests/vendor/modules.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ github.com/google/go-cmp/cmp/internal/value
1818
# github.com/google/pprof v0.0.0-20250403155104-27863c87afa6
1919
## explicit; go 1.23
2020
github.com/google/pprof/profile
21+
# github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e
22+
## explicit; go 1.12
2123
# github.com/onsi/ginkgo/v2 v2.25.3
2224
## explicit; go 1.23.0
2325
github.com/onsi/ginkgo/v2
@@ -104,3 +106,5 @@ golang.org/x/text/transform
104106
golang.org/x/tools/cover
105107
golang.org/x/tools/go/ast/edge
106108
golang.org/x/tools/go/ast/inspector
109+
# gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b
110+
## explicit

0 commit comments

Comments
 (0)