From dc0a69f2f79002ea9500ad60d23b9a81e02f517d Mon Sep 17 00:00:00 2001
From: Jeremy Arnold <Jeremy.Arnold@amd.com>
Date: Thu, 26 Sep 2024 05:27:26 +0000
Subject: [PATCH 1/4] Pin Triton to a specific commit for AMD Llama2 submission

The Dockerfile used for building vLLM on ROCm points to the Triton
main branch by default.  This results in a build that is not repeatable,
and recent Triton updates have introduced incompatibilities which cause
the build to fail.

Update the build_llama2.sh script to build vLLM with a specific commit
of Triton; the revision used here is the same revision that was used
for the MLPerf 4.1 submission.

Also make the second stage of the build (adding the MLPerf-specific
code on top of the generic vLLM image) conditional on having a
successful vLLM build.  Without this change, vLLM build failures
would result in a confusing error message.
---
 closed/AMD/docker/build_llama2.sh | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 closed/AMD/docker/build_llama2.sh

diff --git a/closed/AMD/docker/build_llama2.sh b/closed/AMD/docker/build_llama2.sh
old mode 100644
new mode 100755
index 97d9362b4..55af02860
--- a/closed/AMD/docker/build_llama2.sh
+++ b/closed/AMD/docker/build_llama2.sh
@@ -3,6 +3,7 @@ BASE_IMAGE=rocm/pytorch:rocm6.1.2_ubuntu20.04_py3.9_pytorch_staging
 VLLM_REV=799388d722e22ecb14d1011faaba54c4882cc8f5 # MLPerf-4.1
 HIPBLASLT_BRANCH=8b71e7a8d26ba95774fdc372883ee0be57af3d28
 FA_BRANCH=23a2b1c2f21de2289db83de7d42e125586368e66 # ck_tile - FA 2.5.9
+TRITON_BRANCH=e4a0d93ff1a367c7d4eeebbcd7079ed267e6b06f
 RELEASE_TAG=${RELEASE_TAG:-latest}
 
 git clone https://github.com/ROCm/vllm
@@ -11,9 +12,7 @@ git checkout main
 git pull
 git checkout ${VLLM_REV}
 git cherry-pick b9013696b23dde372cccecdbaf69f0c852008844 # optimizations for process output step, PR #104
-
-docker build --build-arg BASE_IMAGE=${BASE_IMAGE} --build-arg HIPBLASLT_BRANCH=${HIPBLASLT_BRANCH} --build-arg FA_BRANCH=${FA_BRANCH} -f Dockerfile.rocm -t vllm_dev:${VLLM_REV} .
-
 popd
 
-docker build --build-arg BASE_IMAGE=vllm_dev:${VLLM_REV} -f Dockerfile.llama2 -t mlperf/llama_inference:${RELEASE_TAG} ..
+docker build --build-arg BASE_IMAGE=${BASE_IMAGE} --build-arg HIPBLASLT_BRANCH=${HIPBLASLT_BRANCH} --build-arg FA_BRANCH=${FA_BRANCH} --build-arg TRITON_BRANCH=${TRITON_BRANCH} -f vllm/Dockerfile.rocm -t vllm_dev:${VLLM_REV} vllm \
+&& docker build --build-arg BASE_IMAGE=vllm_dev:${VLLM_REV} -f Dockerfile.llama2 -t mlperf/llama_inference:${RELEASE_TAG} ..

From 7697d7fe7a2b65a70de6643c6a83c27a83eb4ac6 Mon Sep 17 00:00:00 2001
From: Jeremy Arnold <Jeremy.Arnold@amd.com>
Date: Sun, 29 Sep 2024 02:50:52 +0000
Subject: [PATCH 2/4] Use specific package versions in Dockerfile

The Dockerfile for AMD's Llama-2 results didn't use specific
versions when installing pip packages, resulting in failures when
newer versions were released.  This update pins those versions to
those that were used for the submission runs.
---
 closed/AMD/docker/Dockerfile.llama2 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/closed/AMD/docker/Dockerfile.llama2 b/closed/AMD/docker/Dockerfile.llama2
index 347fe8d87..9ab520932 100644
--- a/closed/AMD/docker/Dockerfile.llama2
+++ b/closed/AMD/docker/Dockerfile.llama2
@@ -7,13 +7,13 @@ RUN apt update \
  && rm -rf /var/lib/apt/lists/*
 
 RUN pip install \
-	absl-py \
-	datasets \
-	evaluate \
-	nltk \
+	absl-py==2.1.0 \
+	datasets==2.20.0 \
+	evaluate==0.4.2 \
+	nltk==3.8.1 \
 	numpy==1.26.4 \
-	py-libnuma \
-	rouge_score
+	py-libnuma==1.2 \
+	rouge_score==0.1.2
 
 WORKDIR /app
 RUN git clone --recurse-submodules https://github.com/mlcommons/inference.git --branch v4.1 --depth 1 mlperf_inference \

From 8e59f1a9d88050a4d04f7165b8aea99fd53fd16f Mon Sep 17 00:00:00 2001
From: Jeremy Arnold <Jeremy.Arnold@amd.com>
Date: Sun, 29 Sep 2024 02:53:23 +0000
Subject: [PATCH 3/4] AMD: Remove call to package submission tool

The run_scenarios.sh script used in AMD's Llama-2 submission includes
a call to a submission packaging tool, but this tool was not part of
the submission package.  While AMD looks forward to including this
tool in a future submission to make it easier for others to submit
MLPerf results with AMD GPUs, the packaging tool is not yet ready
for broader use.  We are removing the call from the run_scenarios.sh
script to eliminate an error message (which doesn't affect the actual
runs).
---
 .../AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh  | 6 ------
 closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh
index 7fb1acd42..ee06285a3 100644
--- a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh
+++ b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh
@@ -20,9 +20,3 @@ echo "Done Server"
 
 echo "Done Benchmarks"
 echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
-
-echo "Packaging and checking submission results"
-python ../submission/package_submission.py \
-    --base-package-dir ${PACKAGE_DRAFT_DIR} \
-    --system-name ${SYSTEM_NAME} \
-    --input-dir ${RESULTS_DIR}
\ No newline at end of file
diff --git a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh
index 7fb1acd42..ee06285a3 100644
--- a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh
+++ b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh
@@ -20,9 +20,3 @@ echo "Done Server"
 
 echo "Done Benchmarks"
 echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
-
-echo "Packaging and checking submission results"
-python ../submission/package_submission.py \
-    --base-package-dir ${PACKAGE_DRAFT_DIR} \
-    --system-name ${SYSTEM_NAME} \
-    --input-dir ${RESULTS_DIR}
\ No newline at end of file

From d252efacc71ce08ba3468462e4693e760ae29c77 Mon Sep 17 00:00:00 2001
From: Jeremy Arnold <Jeremy.Arnold@amd.com>
Date: Sun, 29 Sep 2024 02:57:52 +0000
Subject: [PATCH 4/4] AMD: Use bash to call run scripts

AMD's internal repo had execution permissions enabled on scripts
for launching the workload, but the execuctable permission was
lost in the submission package.  Switch to using bash to execute
these scripts so that they will work properly without being
executable.
---
 closed/AMD/code/llama2-70b-99.9/README.md                 | 8 ++++----
 .../code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh    | 4 ++--
 .../llama2-70b-99.9/test_VllmFp8/run_tests_Offline.sh     | 8 ++++----
 .../code/llama2-70b-99.9/test_VllmFp8/run_tests_Server.sh | 8 ++++----
 closed/AMD/code/llama2-70b-99/README.md                   | 8 ++++----
 .../AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh  | 4 ++--
 .../code/llama2-70b-99/test_VllmFp8/run_tests_Offline.sh  | 8 ++++----
 .../code/llama2-70b-99/test_VllmFp8/run_tests_Server.sh   | 8 ++++----
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/closed/AMD/code/llama2-70b-99.9/README.md b/closed/AMD/code/llama2-70b-99.9/README.md
index b13c4cf92..62e3bcd69 100644
--- a/closed/AMD/code/llama2-70b-99.9/README.md
+++ b/closed/AMD/code/llama2-70b-99.9/README.md
@@ -48,17 +48,17 @@ KV cache scales for the quantized model weights are used and were downloaded fro
 To generate results for the full submission, running the command below in an inference container. Logs can be found in `/lab-hist/mlperf-results/$datetime1/$datetime2`.
 ``` bash
 cd /lab-mlperf-inference/code/llama2-70b-99.9/test_VllmFp8
-./run_scenarios.sh
+bash ./run_scenarios.sh
 ```
 
 To generate results for the Offline scenario only, run the command below in an inference container. Logs can be found in `/lab-hist/mlperf-results/$datetime1/$datetime2/Offline`.
 ``` bash
 cd /lab-mlperf-inference/code/llama2-70b-99.9/test_VllmFp8
-./run_tests_Offline.sh
+bash ./run_tests_Offline.sh
 ```
 
 To generate results for the Server scenario only, run the command below in an inference container. Logs can be found in `/lab-hist/mlperf-results/$datetime1/$datetime2/Server`.
 ``` bash
 cd /lab-mlperf-inference/code/llama2-70b-99.9/test_VllmFp8
-./run_tests_Server.sh
-```
\ No newline at end of file
+bash ./run_tests_Server.sh
+```
diff --git a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh
index ee06285a3..9faa6bfa4 100644
--- a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh
+++ b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_scenarios.sh
@@ -11,11 +11,11 @@ export RESULTS_DIR=${LAB_CLOG}/${TS_START_BENCHMARKS}
 echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
 
 echo "Running Offline"
-./run_tests_Offline.sh
+bash run_tests_Offline.sh
 echo "Done Offline"
 
 echo "Running Server"
-./run_tests_Server.sh
+bash run_tests_Server.sh
 echo "Done Server"
 
 echo "Done Benchmarks"
diff --git a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Offline.sh b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Offline.sh
index b44bd9ef5..e866a4e24 100644
--- a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Offline.sh
+++ b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Offline.sh
@@ -12,11 +12,11 @@ echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
 for i in $(seq 1 ${NUM_ITERS})
 do
         echo "Running $SCENARIO - Performance run $i/$NUM_ITERS"
-        ITER=$i ./test_VllmFp8_Offline_perf.sh
+        ITER=$i bash test_VllmFp8_Offline_perf.sh
 done
 echo "Running $SCENARIO - Accuracy"
-./test_VllmFp8_Offline_acc.sh
+bash test_VllmFp8_Offline_acc.sh
 echo "Running $SCENARIO - Audit"
-./test_VllmFp8_Offline_audit.sh
+bash test_VllmFp8_Offline_audit.sh
 echo "Done"
-echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
\ No newline at end of file
+echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
diff --git a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Server.sh b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Server.sh
index 52e07dea2..b5723caf6 100644
--- a/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Server.sh
+++ b/closed/AMD/code/llama2-70b-99.9/test_VllmFp8/run_tests_Server.sh
@@ -11,11 +11,11 @@ echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
 for i in $(seq 1 ${NUM_ITERS})
 do
         echo "Running $SCENARIO - Performance run $i/$NUM_ITERS"
-        ITER=$i ./test_VllmFp8_SyncServer_perf.sh
+        ITER=$i bash test_VllmFp8_SyncServer_perf.sh
 done
 echo "Running $SCENARIO - Accuracy"
-./test_VllmFp8_SyncServer_acc.sh
+bash test_VllmFp8_SyncServer_acc.sh
 echo "Running $SCENARIO - Audit"
-./test_VllmFp8_SyncServer_audit.sh
+bash test_VllmFp8_SyncServer_audit.sh
 echo "Done SyncServer"
-echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
\ No newline at end of file
+echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
diff --git a/closed/AMD/code/llama2-70b-99/README.md b/closed/AMD/code/llama2-70b-99/README.md
index b13c4cf92..62e3bcd69 100644
--- a/closed/AMD/code/llama2-70b-99/README.md
+++ b/closed/AMD/code/llama2-70b-99/README.md
@@ -48,17 +48,17 @@ KV cache scales for the quantized model weights are used and were downloaded fro
 To generate results for the full submission, running the command below in an inference container. Logs can be found in `/lab-hist/mlperf-results/$datetime1/$datetime2`.
 ``` bash
 cd /lab-mlperf-inference/code/llama2-70b-99.9/test_VllmFp8
-./run_scenarios.sh
+bash ./run_scenarios.sh
 ```
 
 To generate results for the Offline scenario only, run the command below in an inference container. Logs can be found in `/lab-hist/mlperf-results/$datetime1/$datetime2/Offline`.
 ``` bash
 cd /lab-mlperf-inference/code/llama2-70b-99.9/test_VllmFp8
-./run_tests_Offline.sh
+bash ./run_tests_Offline.sh
 ```
 
 To generate results for the Server scenario only, run the command below in an inference container. Logs can be found in `/lab-hist/mlperf-results/$datetime1/$datetime2/Server`.
 ``` bash
 cd /lab-mlperf-inference/code/llama2-70b-99.9/test_VllmFp8
-./run_tests_Server.sh
-```
\ No newline at end of file
+bash ./run_tests_Server.sh
+```
diff --git a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh
index ee06285a3..9faa6bfa4 100644
--- a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh
+++ b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_scenarios.sh
@@ -11,11 +11,11 @@ export RESULTS_DIR=${LAB_CLOG}/${TS_START_BENCHMARKS}
 echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
 
 echo "Running Offline"
-./run_tests_Offline.sh
+bash run_tests_Offline.sh
 echo "Done Offline"
 
 echo "Running Server"
-./run_tests_Server.sh
+bash run_tests_Server.sh
 echo "Done Server"
 
 echo "Done Benchmarks"
diff --git a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Offline.sh b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Offline.sh
index b44bd9ef5..e866a4e24 100644
--- a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Offline.sh
+++ b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Offline.sh
@@ -12,11 +12,11 @@ echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
 for i in $(seq 1 ${NUM_ITERS})
 do
         echo "Running $SCENARIO - Performance run $i/$NUM_ITERS"
-        ITER=$i ./test_VllmFp8_Offline_perf.sh
+        ITER=$i bash test_VllmFp8_Offline_perf.sh
 done
 echo "Running $SCENARIO - Accuracy"
-./test_VllmFp8_Offline_acc.sh
+bash test_VllmFp8_Offline_acc.sh
 echo "Running $SCENARIO - Audit"
-./test_VllmFp8_Offline_audit.sh
+bash test_VllmFp8_Offline_audit.sh
 echo "Done"
-echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
\ No newline at end of file
+echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
diff --git a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Server.sh b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Server.sh
index 52e07dea2..b5723caf6 100644
--- a/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Server.sh
+++ b/closed/AMD/code/llama2-70b-99/test_VllmFp8/run_tests_Server.sh
@@ -11,11 +11,11 @@ echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
 for i in $(seq 1 ${NUM_ITERS})
 do
         echo "Running $SCENARIO - Performance run $i/$NUM_ITERS"
-        ITER=$i ./test_VllmFp8_SyncServer_perf.sh
+        ITER=$i bash test_VllmFp8_SyncServer_perf.sh
 done
 echo "Running $SCENARIO - Accuracy"
-./test_VllmFp8_SyncServer_acc.sh
+bash test_VllmFp8_SyncServer_acc.sh
 echo "Running $SCENARIO - Audit"
-./test_VllmFp8_SyncServer_audit.sh
+bash test_VllmFp8_SyncServer_audit.sh
 echo "Done SyncServer"
-echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"
\ No newline at end of file
+echo "TS_START_BENCHMARKS=${TS_START_BENCHMARKS}"