Skip to content

Commit f348cee

Browse files
committed
Create scripts for export and e2e run
1 parent ea90759 commit f348cee

File tree

4 files changed

+368
-200
lines changed

4 files changed

+368
-200
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/bin/bash
2+
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
3+
4+
# Export model to CUDA format with optional quantization
5+
6+
show_help() {
7+
cat << EOF
8+
Usage: export_model_cuda_artifact.sh <hf_model> [quant_name] [output_dir]
9+
10+
Export a HuggingFace model to CUDA format with optional quantization.
11+
12+
Arguments:
13+
hf_model HuggingFace model ID (required)
14+
Supported models:
15+
- mistralai/Voxtral-Mini-3B-2507
16+
- openai/whisper-small
17+
- google/gemma-3-4b-it
18+
19+
quant_name Quantization type (optional, default: non-quantized)
20+
Options:
21+
- non-quantized
22+
- quantized-int4-tile-packed
23+
- quantized-int4-weight-only
24+
25+
output_dir Output directory for artifacts (optional, default: current directory)
26+
27+
Examples:
28+
export_model_cuda_artifact.sh "openai/whisper-small"
29+
export_model_cuda_artifact.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
30+
export_model_cuda_artifact.sh "google/gemma-3-4b-it" "non-quantized" "./output"
31+
EOF
32+
}
33+
34+
if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
35+
show_help
36+
exit 0
37+
fi
38+
39+
if [ -z "${1:-}" ]; then
40+
echo "Error: hf_model argument is required"
41+
echo "Run with -h or --help for usage information"
42+
exit 1
43+
fi
44+
45+
set -eux
46+
47+
HF_MODEL="$1"
48+
QUANT_NAME="${2:-non-quantized}"
49+
OUTPUT_DIR="${3:-.}"
50+
51+
# Determine model configuration based on HF model ID
52+
case "$HF_MODEL" in
53+
mistralai/Voxtral-Mini-3B-2507)
54+
MODEL_NAME="voxtral"
55+
TASK="multimodal-text-to-text"
56+
MAX_SEQ_LEN="1024"
57+
EXTRA_PIP="mistral-common librosa"
58+
PREPROCESSOR_FEATURE_SIZE="128"
59+
PREPROCESSOR_OUTPUT="voxtral_preprocessor.pte"
60+
;;
61+
openai/whisper-small)
62+
MODEL_NAME="whisper"
63+
TASK="automatic-speech-recognition"
64+
MAX_SEQ_LEN=""
65+
EXTRA_PIP="librosa"
66+
PREPROCESSOR_FEATURE_SIZE="80"
67+
PREPROCESSOR_OUTPUT="whisper_preprocessor.pte"
68+
;;
69+
google/gemma-3-4b-it)
70+
MODEL_NAME="gemma3"
71+
TASK="multimodal-text-to-text"
72+
MAX_SEQ_LEN="64"
73+
EXTRA_PIP=""
74+
PREPROCESSOR_FEATURE_SIZE=""
75+
PREPROCESSOR_OUTPUT=""
76+
;;
77+
*)
78+
echo "Error: Unsupported model '$HF_MODEL'"
79+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
80+
exit 1
81+
;;
82+
esac
83+
84+
# Determine quantization args based on quant name
85+
case "$QUANT_NAME" in
86+
non-quantized)
87+
EXTRA_ARGS=""
88+
;;
89+
quantized-int4-tile-packed)
90+
EXTRA_ARGS="--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
91+
;;
92+
quantized-int4-weight-only)
93+
EXTRA_ARGS="--qlinear_encoder 4w"
94+
;;
95+
*)
96+
echo "Error: Unsupported quantization '$QUANT_NAME'"
97+
echo "Supported quantizations: non-quantized, quantized-int4-tile-packed, quantized-int4-weight-only"
98+
exit 1
99+
;;
100+
esac
101+
102+
echo "::group::Export $MODEL_NAME"
103+
MAX_SEQ_LEN_ARG=""
104+
if [ -n "$MAX_SEQ_LEN" ]; then
105+
MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"
106+
fi
107+
optimum-cli export executorch \
108+
--model "$HF_MODEL" \
109+
--task "$TASK" \
110+
--recipe "cuda" \
111+
--dtype bfloat16 \
112+
--device cuda \
113+
${MAX_SEQ_LEN_ARG} \
114+
${EXTRA_ARGS} \
115+
--output_dir ./
116+
117+
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
118+
python -m executorch.extension.audio.mel_spectrogram \
119+
--feature_size $PREPROCESSOR_FEATURE_SIZE \
120+
--stack_output \
121+
--max_audio_len 300 \
122+
--output_file $PREPROCESSOR_OUTPUT
123+
fi
124+
125+
test -f model.pte
126+
test -f aoti_cuda_blob.ptd
127+
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
128+
test -f $PREPROCESSOR_OUTPUT
129+
fi
130+
echo "::endgroup::"
131+
132+
echo "::group::Store $MODEL_NAME Artifacts"
133+
mkdir -p "${OUTPUT_DIR}"
134+
cp model.pte "${OUTPUT_DIR}/"
135+
cp aoti_cuda_blob.ptd "${OUTPUT_DIR}/"
136+
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
137+
cp $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
138+
fi
139+
ls -al "${OUTPUT_DIR}"
140+
echo "::endgroup::"

.ci/scripts/test_model_cuda_e2e.sh

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/bin/bash
2+
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
3+
4+
# Test CUDA model end-to-end
5+
6+
show_help() {
7+
cat << EOF
8+
Usage: test_model_cuda_e2e.sh <hf_model> <quant_name> [model_dir]
9+
10+
Build and run end-to-end tests for CUDA models.
11+
12+
Arguments:
13+
hf_model HuggingFace model ID (required)
14+
Supported models:
15+
- mistralai/Voxtral-Mini-3B-2507
16+
- openai/whisper-small
17+
- google/gemma-3-4b-it
18+
19+
quant_name Quantization type (required)
20+
Options:
21+
- non-quantized
22+
- quantized-int4-tile-packed
23+
- quantized-int4-weight-only
24+
25+
model_dir Directory containing model artifacts (optional, default: current directory)
26+
Expected files: model.pte, aoti_cuda_blob.ptd
27+
Tokenizers and test files will be downloaded to this directory
28+
29+
Examples:
30+
test_model_cuda_e2e.sh "openai/whisper-small" "non-quantized"
31+
test_model_cuda_e2e.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
32+
EOF
33+
}
34+
35+
if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
36+
show_help
37+
exit 0
38+
fi
39+
40+
if [ -z "${1:-}" ]; then
41+
echo "Error: hf_model argument is required"
42+
echo "Run with -h or --help for usage information"
43+
exit 1
44+
fi
45+
46+
if [ -z "${2:-}" ]; then
47+
echo "Error: quant_name argument is required"
48+
echo "Run with -h or --help for usage information"
49+
exit 1
50+
fi
51+
52+
set -eux
53+
54+
HF_MODEL="$1"
55+
QUANT_NAME="$2"
56+
# Download tokenizers, audio, and image files to this directory
57+
MODEL_DIR="${3:-.}"
58+
59+
echo "Testing model: $HF_MODEL (quantization: $QUANT_NAME)"
60+
61+
# Make sure model.pte and aoti_cuda_blob.ptd exist
62+
if [ ! -f "$MODEL_DIR/model.pte" ]; then
63+
echo "Error: model.pte not found in $MODEL_DIR"
64+
exit 1
65+
fi
66+
if [ ! -f "$MODEL_DIR/aoti_cuda_blob.ptd" ]; then
67+
echo "Error: aoti_cuda_blob.ptd not found in $MODEL_DIR"
68+
exit 1
69+
fi
70+
# Locate EXECUTORCH_ROOT from the directory of this script
71+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
72+
EXECUTORCH_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
73+
74+
pushd "$EXECUTORCH_ROOT"
75+
76+
# Determine model configuration based on HF model ID
77+
case "$HF_MODEL" in
78+
mistralai/Voxtral-Mini-3B-2507)
79+
MODEL_NAME="voxtral"
80+
RUNNER_TARGET="voxtral_runner"
81+
RUNNER_PATH="voxtral"
82+
EXPECTED_OUTPUT="poem"
83+
PREPROCESSOR="voxtral_preprocessor.pte"
84+
TOKENIZER_URL="https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main"
85+
TOKENIZER_FILE="tekken.json"
86+
AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
87+
AUDIO_FILE="poem.wav"
88+
IMAGE_PATH=""
89+
;;
90+
openai/whisper-small)
91+
MODEL_NAME="whisper"
92+
RUNNER_TARGET="whisper_runner"
93+
RUNNER_PATH="whisper"
94+
EXPECTED_OUTPUT="Mr. Quilter"
95+
PREPROCESSOR="whisper_preprocessor.pte"
96+
TOKENIZER_URL="https://huggingface.co/openai/whisper-small/resolve/main"
97+
TOKENIZER_FILE=""
98+
AUDIO_URL=""
99+
AUDIO_FILE="output.wav"
100+
IMAGE_PATH=""
101+
;;
102+
google/gemma-3-4b-it)
103+
MODEL_NAME="gemma3"
104+
RUNNER_TARGET="gemma3_e2e_runner"
105+
RUNNER_PATH="gemma3"
106+
EXPECTED_OUTPUT="chip"
107+
PREPROCESSOR=""
108+
TOKENIZER_URL="https://huggingface.co/google/gemma-3-4b-it/resolve/main"
109+
TOKENIZER_FILE=""
110+
AUDIO_URL=""
111+
AUDIO_FILE=""
112+
IMAGE_PATH="docs/source/_static/img/et-logo.png"
113+
;;
114+
*)
115+
echo "Error: Unsupported model '$HF_MODEL'"
116+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
117+
exit 1
118+
;;
119+
esac
120+
121+
echo "::group::Setup ExecuTorch Requirements"
122+
./install_requirements.sh
123+
pip list
124+
echo "::endgroup::"
125+
126+
echo "::group::Prepare $MODEL_NAME Artifacts"
127+
128+
129+
# Download tokenizer files
130+
if [ "$TOKENIZER_FILE" != "" ]; then
131+
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
132+
else
133+
curl -L $TOKENIZER_URL/tokenizer.json -o $MODEL_DIR/tokenizer.json
134+
curl -L $TOKENIZER_URL/tokenizer_config.json -o $MODEL_DIR/tokenizer_config.json
135+
curl -L $TOKENIZER_URL/special_tokens_map.json -o $MODEL_DIR/special_tokens_map.json
136+
fi
137+
138+
# Download test files
139+
if [ "$AUDIO_URL" != "" ]; then
140+
curl -L $AUDIO_URL -o ${MODEL_DIR}/$AUDIO_FILE
141+
elif [ "$MODEL_NAME" = "whisper" ]; then
142+
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
143+
fi
144+
145+
ls -al
146+
echo "::endgroup::"
147+
148+
echo "::group::Build $MODEL_NAME Runner"
149+
cmake --preset llm \
150+
-DEXECUTORCH_BUILD_CUDA=ON \
151+
-DCMAKE_INSTALL_PREFIX=cmake-out \
152+
-DCMAKE_BUILD_TYPE=Release \
153+
-Bcmake-out -S.
154+
cmake --build cmake-out -j$(nproc) --target install --config Release
155+
156+
cmake -DEXECUTORCH_BUILD_CUDA=ON \
157+
-DCMAKE_BUILD_TYPE=Release \
158+
-Sexamples/models/$RUNNER_PATH \
159+
-Bcmake-out/examples/models/$RUNNER_PATH/
160+
cmake --build cmake-out/examples/models/$RUNNER_PATH --target $RUNNER_TARGET --config Release
161+
echo "::endgroup::"
162+
163+
echo "::group::Run $MODEL_NAME Runner"
164+
set +e
165+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
166+
167+
# Build runner command with common arguments
168+
RUNNER_BIN="cmake-out/examples/models/$RUNNER_PATH/$RUNNER_TARGET"
169+
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd --temperature 0"
170+
171+
# Add model-specific arguments
172+
case "$MODEL_NAME" in
173+
voxtral)
174+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
175+
;;
176+
whisper)
177+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
178+
;;
179+
gemma3)
180+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --image_path $IMAGE_PATH"
181+
;;
182+
esac
183+
184+
OUTPUT=$($RUNNER_BIN $RUNNER_ARGS 2>&1)
185+
EXIT_CODE=$?
186+
set -e
187+
188+
if ! echo "$OUTPUT" | grep -iq "$EXPECTED_OUTPUT"; then
189+
echo "Expected output '$EXPECTED_OUTPUT' not found in output"
190+
exit 1
191+
elif
192+
echo "Success: '$EXPECTED_OUTPUT' found in output"
193+
fi
194+
195+
if [ $EXIT_CODE -ne 0 ]; then
196+
echo "Unexpected exit code: $EXIT_CODE"
197+
exit $EXIT_CODE
198+
fi
199+
echo "::endgroup::"
200+
201+
popd

0 commit comments

Comments
 (0)