Skip to content

Commit 384a728

Browse files
authored
Update pull.yml to test snapshot saving and loading (CPU-only)(#1486)
* Update pull.yml to test snapshot saving and loading test snapshot saving and loading * Update pull.yml Fixed typos. * Update pull.yml cuda-32.json because somebody would rather fail a job than accept a partil group * Update pull.yml Remove fp16 and fp32 int4 quantized models for now. @jerryzh168 Not sure why these dtypes are not compatible with int4 quantization? * Update pull.yml add DEVICE specification for snapshot and use device cpu
1 parent b57b2be commit 384a728

File tree

1 file changed

+25
-4
lines changed

1 file changed

+25
-4
lines changed

.github/workflows/pull.yml

+25-4
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ jobs:
291291
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
292292
echo "::endgroup::"
293293
294+
echo "::group::Run inference with quantize file"
295+
for DEVICE in cpu; do # cuda
296+
# cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
297+
# follow up with torchao as a separate PR
298+
echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
299+
python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
300+
python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
301+
done
302+
echo "::endgroup::"
303+
294304
test-gpu-aoti-float32:
295305
permissions:
296306
id-token: write
@@ -335,6 +345,11 @@ jobs:
335345
fi
336346
echo "::endgroup::"
337347
348+
# echo "::group::Run inference with quantize file"
349+
# python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
350+
# python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
351+
# echo "::endgroup::"
352+
338353
test-gpu-aoti-float16:
339354
permissions:
340355
id-token: write
@@ -376,10 +391,15 @@ jobs:
376391
echo "::group::Run inference with quantize file"
377392
if [ $(uname -s) == Darwin ]; then
378393
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
379-
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
394+
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
380395
fi
381396
echo "::endgroup::"
382397
398+
# echo "::group::Run inference with quantize file"
399+
# python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
400+
# python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
401+
# echo "::endgroup::"
402+
383403
test-gpu-eval-sanity-check:
384404
permissions:
385405
id-token: write
@@ -495,10 +515,11 @@ jobs:
495515
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
496516
497517
echo "******************************************"
498-
echo "*** --quantize torchchat/quant_config/mobile.json ***"
518+
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
519+
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
499520
echo "******************************************"
500-
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
501-
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
521+
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
522+
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
502523
503524
504525
echo "******************************************"

0 commit comments

Comments
 (0)