Skip to content

Commit 16dbcbb

Browse files
committed
feat: restructure into manager with config
I reorganized the code so there is a Manager for devices, and the manager owns a configuration for naming and paths that is exposed to the outside world. We can further optimize but this is a good start. Signed-off-by: vsoch <[email protected]>
1 parent 81d5506 commit 16dbcbb

File tree

9 files changed

+485
-48
lines changed

9 files changed

+485
-48
lines changed

Makefile

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
SOURCES := $(wildcard *.go cmd/*/*.go pkg/*/*.go)
2-
32
VERSION=$(shell git describe --tags --dirty 2>/dev/null)
3+
DOCKER_IMAGE ?= "ghcr.io/converged-computing/cxi-k8s-device-plugin"
4+
DOCKER_TAG ?= "latest"
45

56
ifeq ($(VERSION),)
67
VERSION := "0.0.1-beta"
@@ -16,6 +17,14 @@ build: $(SOURCES)
1617
tidy:
1718
go mod tidy
1819

20+
.PHONY: docker-build
21+
docker-build:
22+
docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) .
23+
24+
.PHONY: docker-push
25+
docker-push:
26+
docker push $(DOCKER_IMAGE):$(DOCKER_TAG)
27+
1928
.PHONY: clean
2029
clean:
2130
rm -rf bin/

README.md

Lines changed: 287 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,298 @@ kubectl apply \
6060
-f ./deploy/hpecxi-device-plugin-ds.yaml
6161
```
6262

63-
## Pod Resources
63+
## Development
64+
65+
You can build the docker container:
66+
67+
```bash
68+
DOCKER_TAG=04 make docker-build
69+
DOCKER_TAG=04 make docker-push
70+
```
71+
72+
## Example
73+
74+
### Pod Resources
75+
76+
Here is an example for how to request a device from a PodSpec
6477

6578
```yaml
6679
resources:
6780
requests:
6881
beta.hpe.com/cxi: 1
6982
```
7083
84+
### Running
85+
86+
You can run an example with a Flux MiniCluster.
87+
88+
```bash
89+
# Look at this file first an ensure you have updated the entrypoint for your setup
90+
kubectl apply -f ./deploy/hpecxi-device-plugin-ds.yaml
91+
```
92+
93+
Here are the arguments you can set:
94+
95+
```console
96+
/go/bin/cxi-k8s-device-plugin --help
97+
HPE Slingshot device plugin for Kubernetes
98+
/go/bin/cxi-k8s-device-plugin version 0.0.1-beta
99+
Usage:
100+
-alsologtostderr
101+
log to standard error as well as files
102+
-cxi-driver-root string
103+
/sys/modules/<x>/devices root (default "/sys/module/cxi_ss1/drivers")
104+
-libcxi string
105+
Directory path to lib64 with libfabric (default "/usr/lib64")
106+
-libfabric string
107+
Directory path to lib64 with libfabric (default "/opt/cray/libfabric/2.1/lib64")
108+
-log_backtrace_at value
109+
when logging hits line file:N, emit a stack trace
110+
-log_dir string
111+
If non-empty, write log files in this directory
112+
-log_link string
113+
If non-empty, add symbolic links in this directory to the log files
114+
-logbuflevel int
115+
Buffer log messages logged at this level or lower (-1 means don't buffer; 0 means buffer INFO only; ...). Has limited applicability on non-prod platforms.
116+
-logtostderr
117+
log to standard error instead of files
118+
-net-device string
119+
Device prefix to search for in net (e.g, hsi) (default "hsi")
120+
-pci-name string
121+
PCI device name (e.g, pci:cxi_ss1 (default "pci:cxi_ss1")
122+
-pulse int
123+
time between health check polling in seconds. Set to 0 to disable.
124+
-stderrthreshold value
125+
logs at or above this threshold go to stderr (default 2)
126+
-v value
127+
log level for V logs
128+
-vmodule value
129+
comma-separated list of pattern=N settings for file-filtered logging
130+
```
131+
132+
Create the MiniCluster (non interactive)
133+
134+
```bash
135+
kubectl apply -f example/flux-minicluster.yaml
136+
```
137+
138+
And watch lammps run!
139+
140+
<details>
141+
142+
<summary>LAMMPS Log</summary>
143+
144+
```console
145+
[sochat1@hetchy1001:deploy]$ kubectl logs lmp-0-8tjgt -f
146+
Defaulted container "lmp" out of: lmp, flux-view (init)
147+
🟧️ wait-fs: 2025/10/01 07:15:11 wait-fs.go:40: /mnt/flux/flux-operator-done.txt
148+
🟧️ wait-fs: 2025/10/01 07:15:11 wait-fs.go:49: Found existing path /mnt/flux/flux-operator-done.txt
149+
150+
Hello user root
151+
152+
🌟️ Curve Certificate
153+
curve.cert
154+
# **** Generated on 2023-04-26 22:54:42 by CZMQ ****
155+
# ZeroMQ CURVE **Secret** Certificate
156+
# DO NOT PROVIDE THIS FILE TO OTHER USERS nor change its permissions.
157+
158+
metadata
159+
name = "flux-cert-generator"
160+
keygen.hostname = "lmp-0"
161+
curve
162+
public-key = "5*NS#QbaV-ean:38}mN+I1FrcetR9cuFRLDhC?Hf"
163+
secret-key = "goN&y=}!Vn(nt7G4Zo-MCpiU[TwYW&3#X&t<:!qJ"
164+
165+
📦 Resources
166+
flux R encode --hosts=lmp-[0-1] --local
167+
{"version": 1, "execution": {"R_lite": [{"rank": "0-1", "children": {"core": "0-63"}}], "starttime": 0.0, "expiration": 0.0, "nodelist": ["lmp-[0-1]"]}}
168+
👋 Hello, I'm lmp-0
169+
The main host is lmp-0
170+
The working directory is /opt/lammps/examples/reaxff/HNS, contents include:
171+
README.txt ffield.reax.hns log.30Nov23.reaxff.hns.g++.1
172+
data.hns-equil in.reaxff.hns log.30Nov23.reaxff.hns.g++.4
173+
🚩️ Flux Option Flags defined
174+
Command provided is: lmp -v x 8 -v y 8 -v z 8 -in in.reaxff.hns -nocite
175+
Flags for flux are -N 2 -n128
176+
177+
🌀 Submit Mode: flux start -o --config /mnt/flux/view/etc/flux/config -Scron.directory=/etc/flux/system/cron.d -Stbon.fanout=256 -Srundir=/mnt/flux/view/run/flux -Sstatedir=/mnt/flux/view/var/lib/flux -Slocal-uri=local:///mnt/flux/view/run/flux/local -Stbon.connect_timeout=5s -Slog-stderr-level=6 -Slog-stderr-mode=local flux submit -N 2 -n128 --quiet --watch lmp -v x 8 -v y 8 -v z 8 -in in.reaxff.hns -nocite
178+
Flags for flux are -N 2 -n128
179+
broker.info[0]: start: none->join 0.433807ms
180+
broker.info[0]: parent-none: join->init 0.017554ms
181+
cron.info[0]: synchronizing cron tasks to event heartbeat.pulse
182+
job-manager.info[0]: restart: 0 jobs
183+
job-manager.info[0]: restart: 0 running jobs
184+
job-manager.info[0]: restart: checkpoint.job-manager not found
185+
broker.info[0]: rc1.0: running /etc/flux/rc1.d/01-sched-fluxion
186+
sched-fluxion-resource.info[0]: version 0.45.0
187+
sched-fluxion-resource.warning[0]: create_reader: allowlist unsupported
188+
sched-fluxion-resource.info[0]: populate_resource_db: loaded resources from core's resource.acquire
189+
sched-fluxion-qmanager.info[0]: version 0.45.0
190+
broker.info[0]: rc1.0: running /etc/flux/rc1.d/02-cron
191+
broker.info[0]: rc1.0: tab: cron-1 created: scheduled in 71088.204s at Thu Oct 2 03:00:00 2025
192+
broker.info[0]: rc1.0: /etc/flux/rc1 Exited (rc=0) 0.4s
193+
broker.info[0]: rc1-success: init->quorum 0.397933s
194+
broker.info[0]: online: lmp-0 (ranks 0)
195+
broker.info[0]: online: lmp-[0-1] (ranks 0-1)
196+
broker.info[0]: quorum-full: quorum->run 0.448331s
197+
LAMMPS (22 Jul 2025 - Development - patch_22Jul2025-382-g1db2e93763)
198+
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread.
199+
using 1 OpenMP thread(s) per MPI task
200+
Reading data file ...
201+
triclinic box = (0 0 0) to (22.326 11.1412 13.778966) with tilt (0 -5.02603 0)
202+
8 by 4 by 4 MPI processor grid
203+
reading atoms ...
204+
304 atoms
205+
reading velocities ...
206+
304 velocities
207+
read_data CPU = 0.052 seconds
208+
Replication is creating a 8x8x8 = 512 times larger system...
209+
triclinic box = (0 0 0) to (178.608 89.1296 110.23173) with tilt (0 -40.20824 0)
210+
8 by 4 by 4 MPI processor grid
211+
bounding box image = (0 -1 -1) to (0 1 1)
212+
bounding box extra memory = 0.03 MB
213+
average # of replicas added to proc = 19.79 out of 512 (3.87%)
214+
155648 atoms
215+
replicate CPU = 0.003 seconds
216+
Neighbor list info ...
217+
update: every = 20 steps, delay = 0 steps, check = no
218+
max neighbors/atom: 2000, page size: 100000
219+
master list distance cutoff = 11
220+
ghost atom cutoff = 11
221+
binsize = 5.5, bins = 40 17 21
222+
2 neighbor lists, perpetual/occasional/extra = 2 0 0
223+
(1) pair reaxff, perpetual
224+
attributes: half, newton off, ghost
225+
pair build: half/bin/ghost/newtoff
226+
stencil: full/ghost/bin/3d
227+
bin: standard
228+
(2) fix qeq/reax, perpetual, copy from (1)
229+
attributes: half, newton off
230+
pair build: copy
231+
stencil: none
232+
bin: none
233+
Setting up Verlet run ...
234+
Unit style : real
235+
Current step : 0
236+
Time step : 0.1
237+
Per MPI rank memory allocation (min/avg/max) = 143.9 | 143.9 | 143.9 Mbytes
238+
Step Temp PotEng Press E_vdwl E_coul Volume
239+
0 300 -113.27833 438.99618 -111.57687 -1.7014647 1754807.5
240+
10 300.64265 -113.28007 771.21336 -111.57866 -1.7014067 1754807.5
241+
20 302.23163 -113.28471 1617.9776 -111.58344 -1.7012699 1754807.5
242+
30 302.52602 -113.28543 4311.9345 -111.58441 -1.701021 1754807.5
243+
40 301.00893 -113.28084 6495.276 -111.58016 -1.7006791 1754807.5
244+
50 298.22387 -113.27248 6671.9892 -111.57218 -1.7003023 1754807.5
245+
60 295.54892 -113.26445 6412.5588 -111.56453 -1.699926 1754807.5
246+
70 294.96528 -113.26266 7033.6801 -111.56311 -1.6995494 1754807.5
247+
80 297.40591 -113.26991 8436.3516 -111.57073 -1.6991764 1754807.5
248+
90 301.11971 -113.28098 9412.0446 -111.58214 -1.6988469 1754807.5
249+
100 302.41516 -113.28478 10326.738 -111.58617 -1.6986109 1754807.5
250+
Loop time of 21.7324 on 128 procs for 100 steps with 155648 atoms
251+
252+
Performance: 0.040 ns/day, 603.677 hours/ns, 4.601 timesteps/s, 716.204 katom-step/s
253+
98.4% CPU use with 128 MPI tasks x 1 OpenMP threads
254+
255+
MPI task timing breakdown:
256+
Section | min time | avg time | max time |%varavg| %total
257+
---------------------------------------------------------------
258+
Pair | 10.754 | 12.247 | 13.543 | 15.6 | 56.36
259+
Neigh | 0.21365 | 0.21834 | 0.2225 | 0.7 | 1.00
260+
Comm | 0.69795 | 2.0177 | 3.5951 | 39.7 | 9.28
261+
Output | 0.0037101 | 0.026436 | 0.043183 | 10.4 | 0.12
262+
Modify | 7.1327 | 7.2212 | 7.3356 | 2.3 | 33.23
263+
Other | | 0.001477 | | | 0.01
264+
265+
Nlocal: 1216 ave 1222 max 1207 min
266+
Histogram: 1 0 4 5 27 14 39 18 16 4
267+
Nghost: 7592.56 ave 7610 max 7578 min
268+
Histogram: 3 9 13 23 31 19 16 10 3 1
269+
Neighs: 432968 ave 434953 max 429964 min
270+
Histogram: 1 0 6 8 21 26 25 24 13 4
271+
272+
Total # of neighbors = 55419905
273+
Ave neighs/atom = 356.05922
274+
Neighbor list builds = 5
275+
Dangerous builds not checked
276+
Total wall time: 0:00:22
277+
broker.info[0]: rc2.0: flux submit -N 2 -n128 --quiet --watch lmp -v x 8 -v y 8 -v z 8 -in in.reaxff.hns -nocite Exited (rc=0) 27.2s
278+
broker.info[0]: rc2-success: run->cleanup 27.2071s
279+
broker.info[0]: cleanup.0: flux queue stop --quiet --all --nocheckpoint Exited (rc=0) 0.1s
280+
broker.info[0]: cleanup.1: flux resource acquire-mute Exited (rc=0) 0.1s
281+
broker.info[0]: cleanup.2: flux cancel --user=all --quiet --states RUN Exited (rc=0) 0.1s
282+
broker.info[0]: cleanup.3: flux queue idle --quiet Exited (rc=0) 0.1s
283+
broker.info[0]: cleanup-success: cleanup->shutdown 0.451006s
284+
broker.info[0]: children-complete: shutdown->finalize 57.1783ms
285+
broker.info[0]: rc3.0: running /etc/flux/rc3.d/01-sched-fluxion
286+
broker.info[0]: rc3.0: /etc/flux/rc3 Exited (rc=0) 0.1s
287+
broker.info[0]: rc3-success: finalize->goodbye 96.4793ms
288+
broker.info[0]: goodbye: goodbye->exit 0.04662ms
289+
```
290+
291+
</details>
292+
293+
You can set `logging.quiet: true` to only see the LAMMPS logs. See device plugin logs if needed. There is one pod deployed per node.
294+
295+
```bash
296+
$ kubectl logs -n kube-system hpecxi-device-plugin-daemonset-gc2lx -f
297+
```
298+
299+
<details>
300+
301+
<summary>Daemonset Pod Log (Plugin Device)</summary>
302+
303+
```console
304+
I1001 07:02:08.992426 1 main.go:43] HPE Slingshot device plugin for Kubernetes
305+
I1001 07:02:08.992543 1 main.go:43] /go/bin/cxi-k8s-device-plugin version 0.0.1-beta
306+
I1001 07:02:08.992549 1 main.go:61] 🌊 Configuration:
307+
I1001 07:02:08.992553 1 main.go:62] Net Device Prefix: hsi
308+
I1001 07:02:08.992558 1 main.go:63] CXI Driver Root: /sys/module/cxi_ss1/drivers
309+
I1001 07:02:08.992562 1 main.go:64] Libfabric Path: /opt/cray/libfabric/2.1/lib64
310+
I1001 07:02:08.992566 1 main.go:65] Libcxi Path: /usr/lib64
311+
I1001 07:02:08.992570 1 main.go:66] PCI Name: pci:cxi_ss1
312+
I1001 07:02:08.992576 1 manager.go:42] Starting device plugin manager
313+
I1001 07:02:08.992607 1 manager.go:46] Registering for system signal notifications
314+
I1001 07:02:08.992776 1 manager.go:52] Registering for notifications of filesystem changes in device plugin directory
315+
I1001 07:02:08.992839 1 manager.go:60] Starting Discovery on new plugins
316+
I1001 07:02:08.992861 1 manager.go:66] Handling incoming signals
317+
I1001 07:02:08.992871 1 manager.go:71] Received new list of plugins: [cxi]
318+
I1001 07:02:08.992897 1 manager.go:110] Adding a new plugin "cxi"
319+
I1001 07:02:08.992915 1 plugin.go:64] cxi: Starting plugin server
320+
I1001 07:02:08.992927 1 plugin.go:94] cxi: Starting the DPI gRPC server
321+
I1001 07:02:08.993092 1 plugin.go:112] cxi: Serving requests...
322+
I1001 07:02:08.993100 1 plugin.go:128] cxi: Registering the DPI with Kubelet
323+
I1001 07:02:08.993422 1 plugin.go:140] cxi: Registration for endpoint beta.hpe.com_cxi
324+
I1001 07:02:08.995004 1 hpecxi.go:63] [/sys/module/cxi_ss1/drivers/pci:cxi_ss1/0000:01:00.0 /sys/module/cxi_ss1/drivers/pci:cxi_ss1/0000:c2:00.0]
325+
I1001 07:02:08.995019 1 hpecxi.go:67] /sys/module/cxi_ss1/drivers/pci:cxi_ss1/0000:01:00.0
326+
I1001 07:02:08.995061 1 hpecxi.go:67] /sys/module/cxi_ss1/drivers/pci:cxi_ss1/0000:c2:00.0
327+
I1001 07:02:08.995099 1 hpecxi.go:81] Found device hsi1
328+
I1001 07:02:08.995103 1 hpecxi.go:81] Found device hsi0
329+
I1001 07:02:08.995106 1 plugin.go:124] Found 2 HPE Slingshot NICs
330+
I1001 07:02:37.506990 1 plugin.go:185] Mounting /usr/lib64/libcxi.a
331+
I1001 07:02:37.507023 1 plugin.go:185] Mounting /usr/lib64/libcxi.la
332+
I1001 07:02:37.507027 1 plugin.go:185] Mounting /usr/lib64/libcxi.so
333+
I1001 07:02:37.507030 1 plugin.go:185] Mounting /usr/lib64/libcxi.so.1
334+
I1001 07:02:37.507032 1 plugin.go:185] Mounting /usr/lib64/libcxi.so.1.5.0
335+
I1001 07:02:37.507034 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.a
336+
I1001 07:02:37.507037 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.la
337+
I1001 07:02:37.507039 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.so
338+
I1001 07:02:37.507041 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.so.0
339+
I1001 07:02:37.507044 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.so.0.0.0
340+
I1001 07:02:37.507046 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.a
341+
I1001 07:02:37.507048 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.la
342+
I1001 07:02:37.507050 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.so
343+
I1001 07:02:37.507052 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.so.0
344+
I1001 07:02:37.507055 1 plugin.go:185] Mounting /usr/lib64/libcxiutils.so.0.0.0
345+
I1001 07:02:37.507057 1 plugin.go:185] Mounting /opt/cray/libfabric/2.1/lib64/libfabric
346+
I1001 07:02:37.507059 1 plugin.go:185] Mounting /opt/cray/libfabric/2.1/lib64/libfabric.a
347+
I1001 07:02:37.507062 1 plugin.go:185] Mounting /opt/cray/libfabric/2.1/lib64/libfabric.so
348+
I1001 07:02:37.507064 1 plugin.go:185] Mounting /opt/cray/libfabric/2.1/lib64/libfabric.so.1
349+
I1001 07:02:37.507066 1 plugin.go:185] Mounting /opt/cray/libfabric/2.1/lib64/libfabric.so.1.18.2
350+
I1001 07:02:37.507070 1 plugin.go:197] Allocating cxi1
351+
```
352+
353+
</details>
354+
355+
Don't forget to clean up!
356+
71357
> #### Make sure the IPAM definitions in the `./deploy/NetworkAttachmentDefinition` are follwoing your cluster network requirements.

cmd/main.go

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"os"
77
"time"
88

9+
"github.com/HewlettPackard/cxi-k8s-device-plugin/pkg/hpecxi"
910
"github.com/HewlettPackard/cxi-k8s-device-plugin/pkg/plugin"
1011

1112
"github.com/kubevirt/device-plugin-manager/pkg/dpm"
@@ -29,21 +30,41 @@ func main() {
2930
flag.PrintDefaults()
3031
}
3132
var pulse int
32-
devicePrefix := "hsi"
33+
var libfabricPath, libcxiPath, pciName, netDevicePrefix, cxiDriverRoot string
3334
flag.IntVar(&pulse, "pulse", 0, "time between health check polling in seconds. Set to 0 to disable.")
34-
flag.StringVar(&devicePrefix, "prefix", "hsi", "Device prefix to search for in net")
35+
flag.StringVar(&netDevicePrefix, "net-device", hpecxi.NetDevicePrefix, "Device prefix to search for in net (e.g, hsi)")
36+
flag.StringVar(&libfabricPath, "libfabric", hpecxi.LibfabricPath, "Directory path to lib64 with libfabric")
37+
flag.StringVar(&libcxiPath, "libcxi", hpecxi.LibcxiPath, "Directory path to lib64 with libfabric")
38+
flag.StringVar(&pciName, "pci-name", hpecxi.PCIName, "PCI device name (e.g, pci:cxi_ss1")
39+
flag.StringVar(&cxiDriverRoot, "cxi-driver-root", hpecxi.CxiDriverRoot, "/sys/modules/<x>/devices root")
3540
flag.Parse()
3641

3742
for _, v := range versions {
3843
klog.Infof("%s", v)
3944
}
4045

41-
l := plugin.HPECXILister{
42-
ResUpdateChan: make(chan dpm.PluginNameList),
43-
Heartbeat: make(chan bool),
46+
// Configuration for paths, naming
47+
cfg := &hpecxi.HPECXIConfig{
48+
CxiDriverRoot: cxiDriverRoot,
49+
LibfabricPath: libfabricPath,
50+
LibcxiPath: libcxiPath,
51+
NetDevicePrefix: netDevicePrefix,
52+
PCIName: pciName,
4453
}
54+
55+
// Create a new plugin manager for the lister
56+
mgr := hpecxi.NewManager(cfg)
57+
l := plugin.NewHPECXILister(mgr)
4558
manager := dpm.NewManager(&l)
4659

60+
// Tell user the configuration found
61+
klog.Info("🌊 Configuration:")
62+
klog.Infof(" Net Device Prefix: %s\n", cfg.NetDevicePrefix)
63+
klog.Infof(" CXI Driver Root: %s\n", cfg.CxiDriverRoot)
64+
klog.Infof(" Libfabric Path: %s\n", cfg.LibfabricPath)
65+
klog.Infof(" Libcxi Path: %s\n", cfg.LibcxiPath)
66+
klog.Infof(" PCI Name: %s\n", cfg.PCIName)
67+
4768
if pulse > 0 {
4869
go func() {
4970
klog.Infof("Heart beating every %d seconds", pulse)

deploy/hpecxi-device-plugin-ds.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ spec:
2323
value: "true"
2424
effect: NoSchedule
2525
containers:
26-
- image: ghcr.io/hewlettpackard/cxi-k8s-device-plugin:0.0.1-beta
26+
- image: ghcr.io/converged-computing/cxi-k8s-device-plugin:10
2727
imagePullPolicy: Always
2828
name: hpecxi-dp-cntr
2929
securityContext:

0 commit comments

Comments
 (0)