From 8516598640c49cf6857b821b9499d9297ab0d26c Mon Sep 17 00:00:00 2001
From: Etienne Perot <eperot@google.com>
Date: Sun, 15 Dec 2024 21:33:08 -0800
Subject: [PATCH] Re-add PyTorch benchmarks into PyTorch image. Update CUDA
 version.

The PyTorch benchmarks were removed in
https://github.com/google/gvisor/commit/9304ed401fd9604bc36c0436a1132a8b4e3f0851#diff-fd8d6db82d75e1038ed6136c9930c17d6985ff5d22f2ed9e5e8910661de14228
but the Kubernetes PyTorch Kubernetes benchmarks actually depended on them.

PiperOrigin-RevId: 706569220
---
 images/gpu/pytorch/Dockerfile.x86_64       | 94 +++++++++++++++++-----
 test/kubernetes/benchmarks/pytorch.go      | 64 +++------------
 test/kubernetes/benchmarks/pytorch_test.go |  6 --
 3 files changed, 81 insertions(+), 83 deletions(-)

diff --git a/images/gpu/pytorch/Dockerfile.x86_64 b/images/gpu/pytorch/Dockerfile.x86_64
index fed65ef7e5..7890f9f6fb 100644
--- a/images/gpu/pytorch/Dockerfile.x86_64
+++ b/images/gpu/pytorch/Dockerfile.x86_64
@@ -1,29 +1,42 @@
-FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
-
-RUN apt-get update && apt-get install --yes \
-      python3 \
-      python3-distutils \
-      python3-pip \
-      clang \
-      wget \
-      vim \
-      git
-
-RUN python3 -m pip install --ignore-installed \
-      "clang~=$(clang --version | grep -oP 'clang version [.0-9]+' | cut -d' ' -f3)" \
-      torch \
-      torchvision \
-      lightning \
-      numpy \
-      memory_profiler
+FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
+
+# Used for determining the correct pip index URL below.
+ENV CUDA_VERSION=12.4
 
 ENV PYTORCH_DATASETS_DIR=/pytorch-data
 ENV TORCH_HOME=/pytorch-home
+RUN mkdir -p "$TORCH_HOME" && \
+    mkdir -p "$PYTORCH_DATASETS_DIR"
+
+RUN apt-get update && \
+    apt-get install --yes \
+        libgl1-mesa-glx libglib2.0-0 \
+        pkg-config \
+        python3 \
+        python3-distutils \
+        python3-pip \
+        clang \
+        wget \
+        vim \
+        git
+
+RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu$(echo "$CUDA_VERSION" | sed 's~\.~~g')" && \
+    python3 -m pip install --ignore-installed \
+        boto3 \
+        "clang~=$(clang --version | grep -oP 'clang version [.0-9]+' | cut -d' ' -f3)" \
+        lightning \
+        matplotlib \
+        memory_profiler \
+        numba && \
+    python3 -m pip install --ignore-installed \
+        torch \
+        torchvision \
+        torchaudio \
+        numpy \
+        --index-url "$PIP_INDEX_URL"
+
 COPY download_pytorch_datasets.py /tmp/
-# Some PyTorch examples hardcode the data directory to "data", so
-# make a symlink for that too.
-RUN mkdir "$PYTORCH_DATASETS_DIR" && \
-    python3 /tmp/download_pytorch_datasets.py && \
+RUN python3 /tmp/download_pytorch_datasets.py && \
     rm /tmp/download_pytorch_datasets.py
 
 RUN PYTORCH_EXAMPLES_COMMIT=30b310a977a82dbfc3d8e4a820f3b14d876d3bd2 && \
@@ -38,3 +51,40 @@ RUN PYTORCH_EXAMPLES_COMMIT=30b310a977a82dbfc3d8e4a820f3b14d876d3bd2 && \
 
 COPY *.py /
 RUN rm /download_pytorch_datasets.py
+
+RUN PYTORCH_BENCHMARKS_COMMIT=675fb8f537d302a4fef3ed2a67349209e65046ac && \
+    mkdir /pytorch-benchmark && \
+    cd /pytorch-benchmark && \
+    git init && \
+    git remote add origin https://github.com/pytorch/benchmark.git && \
+    git fetch --depth 1 origin "$PYTORCH_BENCHMARKS_COMMIT" && \
+    git checkout FETCH_HEAD
+
+# Note that mobilenet_v2 does not have a requirements.txt file.
+RUN cd /pytorch-benchmark && \
+    python3 -m pip install --ignore-installed \
+        -r requirements.txt \
+        -r torchbenchmark/models/LearningToPaint/requirements.txt \
+        -r torchbenchmark/models/fastNLP_Bert/requirements.txt \
+        -r torchbenchmark/models/hf_BigBird/requirements.txt \
+        -r torchbenchmark/models/speech_transformer/requirements.txt
+
+# These benchmarks are chosen based on diversity of the type of model and their
+# profile with respect to using the GPU and moving data. For more context, see
+# this paper: https://arxiv.org/pdf/2304.14226.pdf
+RUN cd /pytorch-benchmark && \
+    python3 install.py \
+        LearningToPaint \
+        fastNLP_Bert \
+        hf_BigBird \
+        speech_transformer \
+        mobilenet_v2
+
+# Some of these benchmarks download a dataset at runtime.
+# Run them once on CPU just to get this predownloaded into the image.
+RUN cd /pytorch-benchmark && \
+    python3 run.py LearningToPaint --device cpu && \
+    python3 run.py fastNLP_Bert --device cpu && \
+    python3 run.py hf_BigBird --device cpu && \
+    python3 run.py speech_transformer --device cpu && \
+    python3 run.py mobilenet_v2 --device cpu
diff --git a/test/kubernetes/benchmarks/pytorch.go b/test/kubernetes/benchmarks/pytorch.go
index 92fe7e45ef..2ddc55709c 100644
--- a/test/kubernetes/benchmarks/pytorch.go
+++ b/test/kubernetes/benchmarks/pytorch.go
@@ -53,19 +53,9 @@ const (
 	pytorchImage = k8s.ImageRepoPrefix + "gpu/pytorch_x86_64:latest"
 )
 
-type pytorchMode string
-
-// pytorchMode is the pytorch mode used, either script mode (jit) or eager mode.
-// See: https://towardsdatascience.com/pytorch-jit-and-torchscript-c2a77bac0fff
-const (
-	jit   = pytorchMode("jit")
-	eager = pytorchMode("eager")
-)
-
 type pytorchTest struct {
 	module string
 	test   pytorchTestType
-	mode   pytorchMode
 }
 
 // Sets of tests.
@@ -81,12 +71,10 @@ var (
 		{
 			module: "fastNLP_Bert",
 			test:   train,
-			mode:   eager,
 		},
 		{
 			module: "fastNLP_Bert",
 			test:   eval,
-			mode:   eager,
 		},
 	}
 
@@ -100,12 +88,10 @@ var (
 		{
 			module: "hf_BigBird",
 			test:   train,
-			mode:   eager,
 		},
 		{
 			module: "hf_BigBird",
 			test:   eval,
-			mode:   eager,
 		},
 	}
 
@@ -119,12 +105,10 @@ var (
 		{
 			module: "speech_transformer",
 			test:   train,
-			mode:   eager,
 		},
 		{
 			module: "speech_transformer",
 			test:   eval,
-			mode:   eager,
 		},
 	}
 
@@ -138,12 +122,10 @@ var (
 		{
 			module: "LearningToPaint",
 			test:   train,
-			mode:   jit,
 		},
 		{
 			module: "LearningToPaint",
 			test:   eval,
-			mode:   jit,
 		},
 	}
 
@@ -156,29 +138,10 @@ var (
 		{
 			module: "mobilenet_v2",
 			test:   train,
-			mode:   jit,
 		},
 		{
 			module: "mobilenet_v2",
 			test:   eval,
-			mode:   jit,
-		},
-	}
-
-	// BackgroundMatting uses the Background_Matting module classified as "Computer Vision: Pattern Recognition".
-	// BackgroundMatting has a lot of GPU idle time. See Figure 2 on page 5: https://arxiv.org/pdf/2304.14226.pdf
-	//
-	// https://github.com/pytorch/benchmark/tree/main/torchbenchmark/models/Background_Matting (see README)
-	BackgroundMatting = []pytorchTest{
-		{
-			module: "Background_Matting",
-			test:   train,
-			mode:   eager,
-		},
-		{
-			module: "Background_Matting",
-			test:   eval,
-			mode:   eager,
 		},
 	}
 )
@@ -188,7 +151,7 @@ var (
 func (p pytorchTest) Name() string {
 	// Kubernetes pod names cannot contain "_".
 	module := strings.ReplaceAll(strings.ToLower(p.module), "_", "-")
-	return fmt.Sprintf("%s-%s-%s", module, p.test, p.mode)
+	return fmt.Sprintf("%s-%s", module, p.test)
 }
 
 var snakeCase = regexp.MustCompile("_.")
@@ -206,16 +169,7 @@ func (p pytorchTest) BenchName() string {
 		return strings.ToUpper(strings.TrimPrefix(s, "_"))
 	})
 	test := strings.ToUpper(string(p.test)[:1]) + string(p.test[1:])
-	var mode string
-	switch p.mode {
-	case eager:
-		mode = "Eager"
-	case jit:
-		mode = "JIT"
-	default:
-		panic(fmt.Sprintf("Unknown mode: %v", p.mode))
-	}
-	return fmt.Sprintf("%s/%s/%s", moduleName, test, mode)
+	return fmt.Sprintf("%s/%s", moduleName, test)
 }
 
 func (p pytorchTest) toPod(namespace *testcluster.Namespace, image string) (*v13.Pod, error) {
@@ -235,12 +189,12 @@ func (p pytorchTest) toPod(namespace *testcluster.Namespace, image string) (*v13
 
 func (p pytorchTest) command() []string {
 	return []string{
-		"python3",
-		"run.py",
-		p.module,
-		"--device", "cuda",
-		"--test", string(p.test),
-		"--mode", string(p.mode),
+		"sh",
+		"-c",
+		strings.Join([]string{
+			"cd /pytorch-benchmark",
+			fmt.Sprintf("python3 run.py %s --device cuda --test %s", p.module, p.test),
+		}, " && "),
 	}
 }
 
@@ -350,7 +304,7 @@ func parseStandardOutput(output string) ([]benchmetric.MetricValue, error) {
 	}, nil
 }
 
-var gpuTimeRegex = regexp.MustCompile(`GPU\sTime:\s*(\d+\.\d+)\smilliseconds`)
+var gpuTimeRegex = regexp.MustCompile(`GPU\sTime\sper\sbatch:\s*(\d+\.\d+)\smilliseconds`)
 
 func parseGPUTime(output string) (float64, error) {
 	match := gpuTimeRegex.FindStringSubmatch(output)
diff --git a/test/kubernetes/benchmarks/pytorch_test.go b/test/kubernetes/benchmarks/pytorch_test.go
index ea43ab323b..d1787aba7b 100644
--- a/test/kubernetes/benchmarks/pytorch_test.go
+++ b/test/kubernetes/benchmarks/pytorch_test.go
@@ -47,11 +47,6 @@ func TestMobileNetV2(t *testing.T) {
 	runTests(ctx, t, MobileNetV2)
 }
 
-func TestBackgroundMatting(t *testing.T) {
-	ctx := context.Background()
-	runTests(ctx, t, BackgroundMatting)
-}
-
 func runTests(ctx context.Context, t *testing.T, tests []pytorchTest) {
 	k8sCtx, err := k8sctx.Context(ctx)
 	if err != nil {
@@ -72,6 +67,5 @@ func TestMain(m *testing.M) {
 		"TestSpeechTransformer": TestSpeechTransformer,
 		"TestLearningToPaint":   TestLearningToPaint,
 		"TestMobileNetV2":       TestMobileNetV2,
-		"TestBackgroundMatting": TestBackgroundMatting,
 	})
 }