Bump vllm to 0.5.3.post1 and add 9.0 (h100) target (#26)

This commit pins vLLM to latest, which should be fully compatable with llama 3.1. It also adds h100s to the hardware targets. Finally, it installs latest async cog and pget.
replicate · Jul 31, 2024 · 521c7c5 · 521c7c5
1 parent eddf717
commit 521c7c5
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 34 deletions.
diff --git a/cog.yaml b/cog.yaml
@@ -10,11 +10,10 @@ build:
   python_requirements: requirements.txt
 
   run:
-    - --mount=type=cache,target=/root/.cache/pip TORCH_CUDA_ARCH_LIST="8.0;8.6" CUDA_HOME=/usr/local/cuda pip install --ignore-installed vllm==0.4.2
-    - --mount=type=cache,target=/root/.cache/pip pip install cog==0.10.0a11
-    - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.1/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
+    - --mount=type=cache,target=/root/.cache/pip TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0" CUDA_HOME=/usr/local/cuda pip install --ignore-installed vllm==0.5.3.post1
+    - --mount=type=cache,target=/root/.cache/pip pip install cog==0.10.0a18
+    - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.2/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
     - sed -i "s/from vllm.model_executor.layers.quantization.schema import QuantParamSchema/# from vllm.model_executor.layers.quantization.schema import QuantParamSchema/" /root/.pyenv/versions/3.11.9/lib/python3.11/site-packages/vllm/model_executor/model_loader/weight_utils.py
-    - ln -sf $(which echo) $(which pip)
 
 predict: "predict.py:Predictor"
 train: "train.py:train"

diff --git a/tests/end_to_end/test_predict.py b/tests/end_to_end/test_predict.py