Skip to content

Commit 5dd5899

Browse files
committed
Qualcomm AI Engine Direct - Support LLM Perplexity Evaluation on CI
1 parent 0b748bf commit 5dd5899

File tree

6 files changed

+192
-127
lines changed

6 files changed

+192
-127
lines changed

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 0 additions & 69 deletions
This file was deleted.

.ci/scripts/test_qnn_static_llm.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euxo pipefail
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
TASK_NAME=$1
13+
if [[ -z "${TASK_NAME:-}" ]]; then
14+
echo "Missing task name, exiting..."
15+
exit 1
16+
fi
17+
18+
19+
# Download QNN_SDK. If already downloaded, export environment path
20+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
21+
install_qnn
22+
23+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
24+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
25+
export PYTHONPATH=".."
26+
cp schema/program.fbs exir/_serialize/program.fbs
27+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
28+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
29+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
30+
31+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
32+
PYTHON_EXECUTABLE=python3
33+
fi
34+
35+
which "${PYTHON_EXECUTABLE}"
36+
37+
# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
38+
pip install graphviz
39+
40+
set +e
41+
42+
echo "Executing task: $TASK_NAME"
43+
if [[ "${TASK_NAME}" == "stories_110m" ]]; then
44+
# Download stories llama110m artifacts
45+
download_stories_model_artifacts
46+
echo "Creating tokenizer.bin"
47+
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
48+
49+
# Compile only as weight sharing is not applicable on x86.
50+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
51+
exit_code1=$?
52+
53+
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
54+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
55+
exit_code2=$?
56+
57+
# Check the exit codes and print messages
58+
if [ $exit_code1 -ne 0 ]; then
59+
echo "Static Llama compile only with weight sharing test failed. $exit_code1."
60+
fi
61+
62+
if [ $exit_code2 -ne 0 ]; then
63+
echo "Static Llama accuracy test failed. $exit_code2."
64+
fi
65+
66+
if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
67+
exit 1
68+
else
69+
exit 0
70+
fi
71+
72+
elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
73+
74+
# Check BC
75+
bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
76+
exit_code1=$?
77+
if [ $exit_code1 -ne 0 ]; then
78+
exit 1
79+
else
80+
exit 0
81+
fi
82+
83+
elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
84+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
85+
exit_code1=$?
86+
if [ $exit_code1 -ne 0 ]; then
87+
exit 1
88+
else
89+
exit 0
90+
fi
91+
else
92+
echo "Unsupported task: $TASK_NAME"
93+
exit 1
94+
fi

.github/workflows/pull.yml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -558,20 +558,22 @@ jobs:
558558
# Test llama2
559559
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
560560
561-
test-static-llama-qnn-linux:
562-
name: test-static-llama-qnn-linux
561+
test-static-llm-qnn-linux:
562+
name: test-static-llm-qnn-linux
563563
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
564564
permissions:
565565
id-token: write
566566
contents: read
567567
strategy:
568+
matrix:
569+
task: [stories_110m, stories_260k_bc, smollm2_135m]
568570
fail-fast: false
569571
with:
570-
runner: linux.2xlarge
572+
runner: linux.24xlarge
571573
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
572574
submodules: 'recursive'
573575
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
574-
timeout: 180
576+
timeout: 90
575577
script: |
576578
# The generic Linux job chooses to use base env, not the one setup by the image
577579
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -588,8 +590,7 @@ jobs:
588590
# Setup install_requirements for llama
589591
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
590592
591-
# Test static llama weight sharing and accuracy
592-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
593+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llm.sh ${{ matrix.task }}
593594
594595
test-qnn-models-linux:
595596
name: test-qnn-models-linux

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5580,8 +5580,6 @@ def test_static_smollm2(self):
55805580
"kv",
55815581
"--temperature",
55825582
"0",
5583-
"--prefill_ar_len",
5584-
"128",
55855583
"--max_seq_len",
55865584
"1024",
55875585
"--eval_perplexity",
@@ -5609,8 +5607,10 @@ def test_static_smollm2(self):
56095607
if "Error" in msg:
56105608
self.fail(msg["Error"])
56115609
else:
5610+
print("Perplexity score: ", msg["wiki_ppl"])
56125611
self.assertLessEqual(msg["wiki_ppl"], 25)
5613-
self.assertGreaterEqual(msg["inference_speed"], 200)
5612+
if not self.enable_x86_64:
5613+
self.assertGreaterEqual(msg["inference_speed"], 200)
56145614

56155615
def test_static_smollm3(self):
56165616
if not self.required_envs():

0 commit comments

Comments
 (0)