Skip to content

Commit

Permalink
Bump vllm to 0.5.3.post1 and add 9.0 (h100) target (#26)
Browse files Browse the repository at this point in the history
This commit pins vLLM to latest, which should be fully compatable with llama 3.1. It also adds h100s to the hardware targets. Finally, it installs latest async cog and pget.
  • Loading branch information
joehoover authored Jul 31, 2024
1 parent eddf717 commit 521c7c5
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 34 deletions.
7 changes: 3 additions & 4 deletions cog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@ build:
python_requirements: requirements.txt

run:
- --mount=type=cache,target=/root/.cache/pip TORCH_CUDA_ARCH_LIST="8.0;8.6" CUDA_HOME=/usr/local/cuda pip install --ignore-installed vllm==0.4.2
- --mount=type=cache,target=/root/.cache/pip pip install cog==0.10.0a11
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.1/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
- --mount=type=cache,target=/root/.cache/pip TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0" CUDA_HOME=/usr/local/cuda pip install --ignore-installed vllm==0.5.3.post1
- --mount=type=cache,target=/root/.cache/pip pip install cog==0.10.0a18
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.2/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
- sed -i "s/from vllm.model_executor.layers.quantization.schema import QuantParamSchema/# from vllm.model_executor.layers.quantization.schema import QuantParamSchema/" /root/.pyenv/versions/3.11.9/lib/python3.11/site-packages/vllm/model_executor/model_loader/weight_utils.py
- ln -sf $(which echo) $(which pip)

predict: "predict.py:Predictor"
train: "train.py:train"
Expand Down
30 changes: 0 additions & 30 deletions tests/end_to_end/test_predict.py

This file was deleted.

0 comments on commit 521c7c5

Please sign in to comment.