Skip to content

Commit

Permalink
undo scipy changes
Browse files Browse the repository at this point in the history
  • Loading branch information
dafeliton committed Dec 19, 2024
1 parent e84cb93 commit 5c6563d
Showing 1 changed file with 57 additions and 53 deletions.
110 changes: 57 additions & 53 deletions images/scipy-ml-notebook/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
ARG BASE_TAG=latest
ARG BASE_TAG=2025.1-main
FROM ghcr.io/ucsd-ets/datascience-notebook:${BASE_TAG}

USER root

# Package versions (adjust as needed)
ARG CUDA_VERSION=12.1
ARG CUDNN_VERSION=8.9.7.29
ARG TENSORFLOW_VERSION=2.17.0
ARG KERAS_VERSION=3.5.0
ARG TENSORRT_VERSION=8.6.1
ARG TORCH_VERSION=2.3.1
ARG PROTOBUF_VERSION=3.20.3
# tensorflow, pytorch stable versions
# https://pytorch.org/get-started/previous-versions/
# https://www.tensorflow.org/install/source#linux

# Python/Mamba deps
## Package versions
## tf 2.13 does not work with torch 2.2.1. Both require conflicting versions of typing-extensions
ARG CUDA_VERSION=12.1 CUDNN_VERSION=8.9.2.26 LIBNVINFER=7.2.2 LIBNVINFER_MAJOR_VERSION=7 \
TENSORFLOW_VERSION=2.17.0 KERAS_VERSION=3.5.0 TENSORRT_VERSION=8.6.1 TORCH_VERSION=2.3.1 \
PROTOBUF_VERSION=3.20.3

# apt deps
RUN apt-get update && \
apt-get install -y libtinfo6 build-essential && \
apt-get install -y \
libtinfo6 build-essential && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Symbolic link for Stata 17 dependency on libncurses5
## Symbolic link for Stata 17 dependency on libncurses5
RUN ln -s libncurses.so.6 /usr/lib/x86_64-linux-gnu/libncurses.so.5

# Jupyter setup
Expand All @@ -35,72 +37,74 @@ ADD manual_tests /opt/manual_tests

RUN chmod 777 /etc/datahub-profile.d/*.sh /tmp/activate.sh

# Switch to non-root user for installing packages via mamba/pip
USER jovyan

# Install nvdashboard for GPU monitoring
RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard && \
mamba clean -a -y

# Install CUDA toolkit, NCCL, cuDNN via Conda
RUN mamba install -c "nvidia/label/cuda-12.1.1" \
cuda-nvcc \
cuda-toolkit=${CUDA_VERSION} \
cuda-version=${CUDA_VERSION} \
RUN mamba install -c rapidsai-nightly -c conda-forge jupyterlab-nvdashboard

# CUDA setup w/mamba
## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge).
# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11
RUN mamba install -c "nvidia/label/cuda-12.1.1" cuda-nvcc \
cuda-toolkit=$CUDA_VERSION \
cuda-version=$CUDA_VERSION \
nccl \
cudnn=${CUDNN_VERSION} \
-y && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

# Install protobuf via pip to ensure a specific version
RUN pip install --no-cache-dir protobuf==${PROTOBUF_VERSION}

# Install other Python packages that are simpler via pip
RUN pip install --no-cache-dir opencv-contrib-python-headless opencv-python && \
fix-permissions $CONDA_DIR && \
# Install scipy pip packages
## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice.
## https://github.com/spesmilo/electrum/issues/7825
## pip cache purge didnt work here for some reason.
RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION
## cuda-python installed to have parity with tensorflow and cudnn
## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712
## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues.
RUN pip install opencv-contrib-python-headless \
opencv-python && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
pip cache purge

# Install common packages via conda-forge
RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all

# Install PyTorch and GPU support from Conda
# Use pytorch & nvidia channels to ensure proper CUDA integration
RUN mamba install pytorch==${TORCH_VERSION} torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia -y && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

# Install TensorFlow, Keras, and TF datasets from conda-forge if available
# Note: Check if these versions are available and GPU-accelerated on conda-forge.
RUN mamba install tensorflow==${TENSORFLOW_VERSION} keras==${KERAS_VERSION} tensorflow-datasets -c conda-forge -y && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y

# Additional ML packages via pip
RUN pip install --no-cache-dir transformers datasets accelerate huggingface-hub timm && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge
# Install CUDA/Torch/Tensorflow/Keras w/pip
# TF Compatibility Matrix: https://www.tensorflow.org/install/source?hl=en#gpu
## no purge required but no-cache-dir is used. pip purge will actually break the build here!
## Beware of potentially needing to update these if we update the drivers.
## Check tensorrt_env_vars.sh if you have to bump tensorrt!
RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TENSORFLOW_VERSION && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge

RUN pip install transformers datasets accelerate huggingface-hub timm && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge

USER $NB_UID:$NB_GID
ENV PATH=${PATH}:/usr/local/nvidia/bin:/opt/conda/bin

# CUDA fixes for CONDA
## Copy libdevice file to the required path
RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \
cp $CONDA_DIR/nvvm/libdevice/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/
#CUDA 11: cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/

# TensorRT fix for TensorFlow (if needed)
# Adjust paths as necessary, depending on how tensorrt is installed.
#RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.${TENSORRT_VERSION} && \
# ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.${TENSORRT_VERSION}
# TensorRT fix for tensorflow
## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT)
## This will most definitely have to be changed after 8.6.1...
RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \
ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION

# Run datahub scripts
RUN . /tmp/activate.sh

0 comments on commit 5c6563d

Please sign in to comment.