Skip to content

Commit 5d5615f

Browse files
authored
Merge branch 'main' into Jack-Khuu-patch-23
2 parents a3d8cd8 + 2766a95 commit 5d5615f

27 files changed

+543
-278
lines changed

.ci/scripts/check_gibberish

+12
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ else
2424
fi
2525
fi
2626

27+
#######################################################################
28+
#
29+
# check whether aspell spell check evailable
30+
31+
if command -v aspell &> /dev/null; then
32+
echo "Checking $TMPFILE for gibberish"
33+
else
34+
echo "Aspell is not installed or not in PATH."
35+
echo "Gibberish unchecked in $TMPFILE"
36+
exit 0
37+
fi
38+
2739
#######################################################################
2840
#
2941
# run spell check on the extracted sequence

.ci/scripts/run-docs

+66-140
Original file line numberDiff line numberDiff line change
@@ -1,145 +1,71 @@
1-
# /bin/bash -x
1+
#!/bin/bash -x
22

3-
if [ "X$1" == "X" ]; then
3+
# Check if an argument was provided
4+
if [ -z "$1" ]; then
45
echo "Must specify document to run"
56
exit 1
67
fi
78

8-
if [ "$1" == "readme" ]; then
9-
echo "::group::Create script to run README"
10-
python3 torchchat/utils/scripts/updown.py --create-sections --file README.md --replace 'llama3.1:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
11-
# for good measure, if something happened to updown processor,
12-
# and it did not error out, fail with an exit 1
13-
echo "exit 1" >> ./run-readme.sh
14-
echo "::endgroup::"
15-
16-
echo "::group::Run README"
17-
echo "*******************************************"
18-
cat ./run-readme.sh
19-
echo "*******************************************"
20-
bash -x ./run-readme.sh
21-
echo "::endgroup::"
22-
23-
exit 0
24-
fi
25-
26-
if [ "$1" == "quantization" ]; then
27-
echo "::group::Create script to run quantization"
28-
python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
29-
# for good measure, if something happened to updown processor,
30-
# and it did not error out, fail with an exit 1
31-
echo "exit 1" >> ./run-quantization.sh
32-
echo "::endgroup::"
33-
34-
echo "::group::Run quantization"
35-
echo "*******************************************"
36-
cat ./run-quantization.sh
37-
echo "*******************************************"
38-
bash -x ./run-quantization.sh
39-
echo "::endgroup::"
40-
41-
exit 0
42-
fi
43-
44-
if [ "$1" == "gguf" ]; then
45-
echo "::group::Create script to run gguf"
46-
python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-gguf.sh
47-
# for good measure, if something happened to updown processor,
48-
# and it did not error out, fail with an exit 1
49-
echo "exit 1" >> ./run-gguf.sh
50-
echo "::endgroup::"
51-
52-
echo "::group::Run gguf"
53-
echo "*******************************************"
54-
cat ./run-gguf.sh
55-
echo "*******************************************"
56-
bash -x ./run-gguf.sh
57-
echo "::endgroup::"
58-
fi
59-
60-
61-
if [ "$1" == "advanced" ]; then
62-
echo "::group::Create script to run advanced"
63-
python3 torchchat/utils/scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
64-
# for good measure, if something happened to updown processor,
65-
# and it did not error out, fail with an exit 1
66-
echo "exit 1" >> ./run-advanced.sh
67-
echo "::endgroup::"
68-
69-
echo "::group::Run advanced"
70-
echo "*******************************************"
71-
cat ./run-advanced.sh
72-
echo "*******************************************"
73-
bash -x ./run-advanced.sh
74-
echo "::endgroup::"
75-
fi
76-
77-
if [ "$1" == "evaluation" ]; then
78-
echo "::group::Create script to run evaluation"
79-
python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
80-
# for good measure, if something happened to updown processor,
81-
# and it did not error out, fail with an exit 1
82-
echo "exit 1" >> ./run-evaluation.sh
83-
echo "::endgroup::"
84-
85-
echo "::group::Run evaluation"
86-
echo "*******************************************"
87-
cat ./run-evaluation.sh
88-
echo "*******************************************"
89-
bash -x ./run-evaluation.sh
90-
fi
91-
92-
if [ "$1" == "multimodal" ]; then
93-
94-
# Expecting that this might fail this test as-is, because
95-
# it's the first on-pr test depending on github secrets for access with HF token access
96-
97-
echo "::group::Create script to run multimodal"
98-
python3 torchchat/utils/scripts/updown.py --file docs/multimodal.md > ./run-multimodal.sh
99-
# for good measure, if something happened to updown processor,
100-
# and it did not error out, fail with an exit 1
101-
echo "exit 1" >> ./run-multimodal.sh
102-
echo "::endgroup::"
103-
104-
echo "::group::Run multimodal"
105-
echo "*******************************************"
106-
cat ./run-multimodal.sh
107-
echo "*******************************************"
108-
bash -x ./run-multimodal.sh
109-
echo "::endgroup::"
110-
fi
111-
112-
if [ "$1" == "native" ]; then
113-
114-
echo "::group::Create script to run native-execution"
115-
python3 torchchat/utils/scripts/updown.py --file docs/native-execution.md > ./run-native.sh
116-
# for good measure, if something happened to updown processor,
117-
# and it did not error out, fail with an exit 1
118-
echo "exit 1" >> ./run-native.sh
119-
echo "::endgroup::"
120-
121-
echo "::group::Run native-execution"
122-
echo "*******************************************"
123-
cat ./run-native.sh
124-
echo "*******************************************"
125-
bash -x ./run-native.sh
126-
echo "::endgroup::"
127-
fi
128-
129-
if [ "$1" == "distributed" ]; then
130-
131-
echo "::group::Create script to run distributed"
132-
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md --replace 'llama3.1:stories110M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-distributed.sh
133-
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md --suppress huggingface-cli,HF_TOKEN > ./run-distributed.sh
134-
# for good measure, if something happened to updown processor,
135-
# and it did not error out, fail with an exit 1
136-
echo "exit 1" >> ./run-distributed.sh
137-
echo "::endgroup::"
138-
139-
echo "::group::Run distributed"
140-
echo "*******************************************"
141-
cat ./run-distributed.sh
142-
echo "*******************************************"
143-
bash -x ./run-distributed.sh
144-
echo "::endgroup::"
145-
fi
9+
# Pre-initialize variables
10+
filepath=""
11+
# cuda supports padding, so no need to replace quantization for now.
12+
# otherwise add: 'cuda.json:cuda-32.json' to replace rules
13+
parameters="--replace llama3:stories15M,-l3:-l2,mobile.json:mobile-32.json --suppress huggingface-cli,HF_TOKEN"
14+
script_name="./run-${1}.sh" # Dynamically initialize script name
15+
16+
# Use a case statement to handle the $1 argument
17+
case "$1" in
18+
"readme")
19+
filepath="README.md"
20+
parameters="--replace llama3.1:stories15M,-l3:-l2,mobile.json:mobile-32.json --suppress huggingface-cli,HF_TOKEN"
21+
;;
22+
"quantization")
23+
filepath="docs/quantization.md"
24+
;;
25+
"gguf")
26+
filepath="docs/GGUF.md"
27+
;;
28+
"advanced")
29+
filepath="docs/ADVANCED-USERS.md"
30+
;;
31+
"evaluation")
32+
filepath="torchchat/utils/docs/evaluation.md"
33+
;;
34+
"multimodal")
35+
filepath="docs/multimodal.md"
36+
parameters="" # Clear parameters
37+
;;
38+
"native")
39+
filepath="docs/native-execution.md"
40+
parameters="" # Clear parameters
41+
;;
42+
"distributed")
43+
filepath="docs/distributed.md"
44+
parameters="--replace llama3.1:stories110M,-l3:-l2 --suppress huggingface-cli,HF_TOKEN" # Use stories110M to avoid need for authentication
45+
;;
46+
"local")
47+
filepath="docs/local-model.md"
48+
parameters="" # Clear parameters
49+
;;
50+
51+
*)
52+
echo "Unknown option: $1"
53+
exit 1
54+
;;
55+
esac
56+
57+
# Generate the script
58+
echo "::group::Create script to run $1"
59+
python3 torchchat/utils/scripts/updown.py --file "$filepath" $parameters > "$script_name"
60+
# if something happened to updown processor, and it did not error out, fail with an exit 1
61+
echo "exit 1" >> "$script_name"
62+
echo "::endgroup::"
63+
64+
# Run the script
65+
echo "::group::Run $1"
66+
echo "*******************************************"
67+
cat "$script_name"
68+
echo "*******************************************"
69+
set -x
70+
. "$script_name"
71+
echo "::endgroup::"

.github/workflows/more-tests.yml

+67-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ jobs:
1919
gpu-arch-version: "12.4"
2020
timeout: 60
2121
script: |
22+
set -xeou pipefail
2223
echo "::group::Print machine info"
2324
uname -a
2425
echo "::endgroup::"
@@ -39,9 +40,10 @@ jobs:
3940
echo "::endgroup::"
4041
4142
echo "::group::Run inference"
42-
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
43+
export MODEL_DIR=checkpoints/stories15M/
44+
export MODEL_PATH=${MODEL_DIR}/stories15M.pt
4345
export MODEL_NAME=stories15M
44-
export MODEL_DIR=/tmp
46+
4547
4648
for DTYPE in bfloat16 float16 float32; do
4749
###################################################################
@@ -83,3 +85,66 @@ jobs:
8385
echo "tests complete"
8486
echo "******************************************"
8587
echo "::endgroup::"
88+
89+
90+
test-sdpa-backends-export:
91+
permissions:
92+
id-token: write
93+
contents: read
94+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
95+
with:
96+
runner: linux.g5.4xlarge.nvidia.gpu
97+
gpu-arch-type: cuda
98+
gpu-arch-version: "12.4"
99+
timeout: 60
100+
script: |
101+
set -xeou pipefail
102+
echo "::group::Print machine info"
103+
uname -a
104+
echo "::endgroup::"
105+
106+
echo "::group::Download checkpoints"
107+
# Install requirements
108+
./install/install_requirements.sh cuda
109+
pip3 list
110+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
111+
echo "::endgroup::"
112+
113+
echo "::group::Download checkpoints"
114+
mkdir -p checkpoints/stories15M
115+
pushd checkpoints/stories15M
116+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
117+
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
118+
popd
119+
echo "::endgroup::"
120+
121+
echo "::group::Run inference"
122+
export MODEL_DIR=checkpoints/stories15M/
123+
export MODEL_PATH=${MODEL_DIR}/stories15M.pt
124+
export MODEL_NAME=stories15M
125+
126+
./torchchat/utils/scripts/build_native.sh aoti
127+
128+
for DEVICE in cpu cuda; do
129+
# depending on how the parameter passing works, may only be able to do bfloat16 for aoti_run, similar to runner-cuda-dtype.yml
130+
# (although the runner environment should not have an opinion what we us in the artifact, and we might suitably abstract that)
131+
for DTYPE in bfloat16 float16 float32; do
132+
for SDPA in 'math' 'flash_attention' 'efficient_attention' 'cudnn_attention'; do
133+
echo "***************************************************************"
134+
echo "*** $DEVICE $DTYPE $SDPA"
135+
###################################################################
136+
# Export DSO and run with Python
137+
python torchchat.py export --output-dso dso.so --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE}
138+
python torchchat.py generate --dso-path dso.so --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE} --temperature 0 --prompt "Once upon a time"
139+
###################################################################
140+
# Export AOTI and run with aoti_run
141+
python torchchat.py export --output-aoti /tmp/model.pt2 --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE}
142+
./cmake-out/aoti_run /tmp/model.pt2 -z ${MODEL_DIR}/tokenizer.model -i "Once upon a time"
143+
###################################################################
144+
done
145+
done
146+
done
147+
148+
echo "tests complete"
149+
echo "******************************************"
150+
echo "::endgroup::"

0 commit comments

Comments
 (0)