Merge pull request #10 from TymonLee/main_add_vgpu_monitor

add vGPU monitor container
Project-HAMi · Aug 12, 2024 · 1a14603 · 1a14603
2 parents 18eb946 + 567494c
commit 1a14603
Show file tree

Hide file tree

Showing 15 changed files with 1,135 additions and 32 deletions.
diff --git a/README.md b/README.md
@@ -188,6 +188,13 @@ volcano-scheduler-metrics records every GPU usage and limitation, visit the foll
 curl {volcano scheduler cluster ip}:8080/metrics
 ```
 
+You can also collect the **GPU utilization**, **GPU memory usage**, **pods' GPU memory limitations** and **pods' GPU memory usage** metrics on nodes by visiting the following addresses:
+
+```
+curl {volcano device plugin cluster ip}:9394/metrics
+```
+![img](./doc/vgpu_device_plugin_metrics.png)
+
 # Issues and Contributing
 [Checkout the Contributing document!](CONTRIBUTING.md)
 

diff --git a/cmd/vGPUmonitor/build.sh b/cmd/vGPUmonitor/build.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Copyright 2024 The HAMi Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative noderpc/noderpc.proto
+go build
diff --git a/cmd/vGPUmonitor/cudevshr.go b/cmd/vGPUmonitor/cudevshr.go
@@ -0,0 +1,103 @@
+/*
+Copyright 2024 The HAMi Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/exp/mmap"
+)
+
+type deviceMemory struct {
+	contextSize uint64
+	moduleSize  uint64
+	bufferSize  uint64
+	offset      uint64
+	total       uint64
+}
+
+type shrregProcSlotT struct {
+	pid         int32
+	hostpid     int32
+	used        [16]deviceMemory
+	monitorused [16]uint64
+	status      int32
+}
+
+type uuid struct {
+	uuid [96]byte
+}
+
+type semT struct {
+	sem [32]byte
+}
+
+type sharedRegionT struct {
+	initializedFlag int32
+	smInitFlag      int32
+	ownerPid        uint32
+	sem             semT
+	num             uint64
+	uuids           [16]uuid
+
+	limit   [16]uint64
+	smLimit [16]uint64
+	procs   [1024]shrregProcSlotT
+
+	procnum           int32
+	utilizationSwitch int32
+	recentKernel      int32
+	priority          int32
+}
+
+type nvidiaCollector struct {
+	// Exposed for testing
+	cudevshrPath string
+	at           *mmap.ReaderAt
+	cudaCache    *sharedRegionT
+}
+
+func mmapcachefile(filename string, nc *nvidiaCollector) error {
+	var m = &sharedRegionT{}
+	f, err := os.OpenFile(filename, os.O_RDWR, 0666)
+	if err != nil {
+		fmt.Println("openfile error=", err.Error())
+		return err
+	}
+	data, err := syscall.Mmap(int(f.Fd()), 0, int(unsafe.Sizeof(*m)), syscall.PROT_WRITE|syscall.PROT_READ, syscall.MAP_SHARED)
+	if err != nil {
+		return err
+	}
+	var cachestr *sharedRegionT = *(**sharedRegionT)(unsafe.Pointer(&data))
+	fmt.Println("sizeof=", unsafe.Sizeof(*m), "cachestr=", cachestr.utilizationSwitch, cachestr.recentKernel)
+	nc.cudaCache = cachestr
+	return nil
+}
+
+func getvGPUMemoryInfo(nc *nvidiaCollector) (*sharedRegionT, error) {
+	if len(nc.cudevshrPath) > 0 {
+		if nc.cudaCache == nil {
+			mmapcachefile(nc.cudevshrPath, nc)
+		}
+		return nc.cudaCache, nil
+	}
+	return &sharedRegionT{}, errors.New("not found path")
+}
diff --git a/cmd/vGPUmonitor/feedback.go b/cmd/vGPUmonitor/feedback.go
@@ -0,0 +1,135 @@
+/*
+Copyright 2024 The HAMi Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"time"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+)
+
+type UtilizationPerDevice []int
+
+var srPodList map[string]podusage
+
+func init() {
+	srPodList = make(map[string]podusage)
+}
+
+func CheckBlocking(utSwitchOn map[string]UtilizationPerDevice, p int, pu podusage) bool {
+	for _, devuuid := range pu.sr.uuids {
+		_, ok := utSwitchOn[string(devuuid.uuid[:])]
+		if ok {
+			for i := 0; i < p; i++ {
+				if utSwitchOn[string(devuuid.uuid[:])][i] > 0 {
+					return true
+				}
+			}
+			return false
+		}
+	}
+	return false
+}
+
+// Check whether task with higher priority use GPU or there are other tasks with the same priority.
+func CheckPriority(utSwitchOn map[string]UtilizationPerDevice, p int, pu podusage) bool {
+	for _, devuuid := range pu.sr.uuids {
+		_, ok := utSwitchOn[string(devuuid.uuid[:])]
+		if ok {
+			for i := 0; i < p; i++ {
+				if utSwitchOn[string(devuuid.uuid[:])][i] > 0 {
+					return true
+				}
+			}
+			if utSwitchOn[string(devuuid.uuid[:])][p] > 1 {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func Observe(srlist *map[string]podusage) error {
+	utSwitchOn := map[string]UtilizationPerDevice{}
+
+	for idx, val := range *srlist {
+		if val.sr == nil {
+			continue
+		}
+		if val.sr.recentKernel > 0 {
+			(*srlist)[idx].sr.recentKernel--
+			if (*srlist)[idx].sr.recentKernel > 0 {
+				for _, devuuid := range val.sr.uuids {
+					// Null device condition
+					if devuuid.uuid[0] == 0 {
+						continue
+					}
+					if len(utSwitchOn[string(devuuid.uuid[:])]) == 0 {
+						utSwitchOn[string(devuuid.uuid[:])] = []int{0, 0}
+					}
+					utSwitchOn[string(devuuid.uuid[:])][val.sr.priority]++
+				}
+			}
+		}
+	}
+	for idx, val := range *srlist {
+		if val.sr == nil {
+			continue
+		}
+		if CheckBlocking(utSwitchOn, int(val.sr.priority), val) {
+			if (*srlist)[idx].sr.recentKernel >= 0 {
+				klog.Infof("utSwitchon=%v", utSwitchOn)
+				klog.Infof("Setting Blocking to on %v", idx)
+				(*srlist)[idx].sr.recentKernel = -1
+			}
+		} else {
+			if (*srlist)[idx].sr.recentKernel < 0 {
+				klog.Infof("utSwitchon=%v", utSwitchOn)
+				klog.Infof("Setting Blocking to off %v", idx)
+				(*srlist)[idx].sr.recentKernel = 0
+			}
+		}
+		if CheckPriority(utSwitchOn, int(val.sr.priority), val) {
+			if (*srlist)[idx].sr.utilizationSwitch != 1 {
+				klog.Infof("utSwitchon=%v", utSwitchOn)
+				klog.Infof("Setting UtilizationSwitch to on %v", idx)
+				(*srlist)[idx].sr.utilizationSwitch = 1
+			}
+		} else {
+			if (*srlist)[idx].sr.utilizationSwitch != 0 {
+				klog.Infof("utSwitchon=%v", utSwitchOn)
+				klog.Infof("Setting UtilizationSwitch to off %v", idx)
+				(*srlist)[idx].sr.utilizationSwitch = 0
+			}
+		}
+	}
+	return nil
+}
+
+func watchAndFeedback() {
+	nvml.Init()
+	for {
+		time.Sleep(time.Second * 5)
+		err := monitorPath(srPodList)
+		if err != nil {
+			klog.Errorf("monitorPath failed %v", err.Error())
+		}
+		klog.Infof("WatchAndFeedback srPodList=%v", srPodList)
+		Observe(&srPodList)
+	}
+}
diff --git a/cmd/vGPUmonitor/main.go b/cmd/vGPUmonitor/main.go
@@ -0,0 +1,34 @@
+/*
+Copyright 2024 The HAMi Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"k8s.io/klog/v2"
+)
+
+func main() {
+	if err := ValidateEnvVars(); err != nil {
+		klog.Fatalf("Failed to validate environment variables: %v", err)
+	}
+	errchannel := make(chan error)
+	go initMetrics()
+	go watchAndFeedback()
+	for {
+		err := <-errchannel
+		klog.Errorf("failed to serve: %v", err)
+	}
+}