fix(docker): Fix GGML_CUDA param (abetlen#1633)

Smartappli · web-flow · commit ac0217404c72 · 2024-07-31T12:13:24.000-04:00
diff --git a/docker/cuda_simple/Dockerfile b/docker/cuda_simple/Dockerfile
@@ -15,13 +15,13 @@ COPY . .
 
 # setting build related env vars
 ENV CUDA_DOCKER_ARCH=all
-ENV LLAMA_CUBLAS=1
+ENV GGML_CUDA=1
 
 # Install depencencies
 RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
 
 # Install llama-cpp-python (build with cuda)
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
+RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 
 # Run the server
 CMD python3 -m llama_cpp.server