Merge branch 'main' into tpu-1vm-image

djherbis · djherbis · commit 2ad5f874a7da · 2022-05-13T14:05:38.000Z
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -12,8 +12,10 @@ ARG TORCHVISION_VERSION
 FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl
 FROM gcr.io/kaggle-images/python-torch-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${TORCH_VERSION} AS torch_whl
 FROM ${BASE_IMAGE_REPO}/${GPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
-ENV CUDA_MAJOR_VERSION=11
-ENV CUDA_MINOR_VERSION=0
+ARG CUDA_MAJOR_VERSION
+ARG CUDA_MINOR_VERSION
+ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
+ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 # NVIDIA binaries from the host are mounted to /opt/bin.
 ENV PATH=/opt/bin:${PATH}
 # Add CUDA stubs to LD_LIBRARY_PATH to support building the GPU image on a CPU machine.
@@ -51,6 +53,13 @@ RUN pip uninstall -y horovod && \
     /tmp/clean-layer.sh
 {{ end }}
 
+{{ if eq .Accelerator "gpu" }}
+# b/230864778: Temporarily swap the NVIDIA GPG key. Remove once new base image with new GPG key is released.
+RUN rm /etc/apt/sources.list.d/cuda.list && \
+    apt-key del 7fa2af80 && \
+    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+{{ end }}
+
 # Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections,
 # as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346
 RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \
@@ -72,8 +81,7 @@ RUN conda config --add channels nvidia && \
     conda config --add channels rapidsai && \
     # Base image channel order: conda-forge (highest priority), defaults.
     # End state: rapidsai (highest priority), nvidia, conda-forge, defaults.
-    # b/216162758 Pin mkl which last version breaks spacy.
-    conda install mkl=2021.4.0 cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
+    conda install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
     /tmp/clean-layer.sh
 
 {{ if eq .Accelerator "gpu" }}
@@ -93,11 +101,12 @@ RUN conda install implicit && \
 # Install PyTorch
 {{ if eq .Accelerator "gpu" }}
 COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
-RUN pip install /tmp/torch/*.whl && \
+RUN conda install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
+    pip install /tmp/torch/*.whl && \
     rm -rf /tmp/torch && \
     /tmp/clean-layer.sh
 {{ else }}
-RUN pip install torch==$TORCH_VERSION+cpu torchvision==$TORCHVISION_VERSION+cpu torchaudio==$TORCHAUDIO_VERSION torchtext==$TORCHTEXT_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \
+RUN pip install torch==$TORCH_VERSION+cpu torchvision==$TORCHVISION_VERSION+cpu torchaudio==$TORCHAUDIO_VERSION+cpu torchtext==$TORCHTEXT_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \
     /tmp/clean-layer.sh
 {{ end }}
 
@@ -155,7 +164,7 @@ RUN pip install pycuda && \
 
 RUN pip install pysal && \
     pip install seaborn python-dateutil dask python-igraph && \
-    pip install pyyaml joblib husl geopy ml_metrics mne pyshp && \
+    pip install pyyaml joblib husl geopy mne pyshp && \
     pip install pandas && \
     pip install flax && \
     # Install h2o from source.
@@ -212,6 +221,8 @@ RUN pip install ibis-framework && \
 
 RUN pip install scipy && \
     pip install scikit-learn && \
+    # Scikit-learn accelerated library for x86 
+    pip install scikit-learn-intelex && \
     # HDF5 support
     pip install h5py && \
     pip install biopython && \
@@ -277,8 +288,6 @@ RUN pip install mpld3 && \
     pip install pyldavis==3.2.2 && \
     pip install mlxtend && \
     pip install altair && \
-    # b/183944405 pystan 3.x is not compatible with fbprophet.
-    pip install pystan==2.19.1.1 && \
     pip install ImageHash && \
     pip install ecos && \
     pip install CVXcanon && \
@@ -301,7 +310,7 @@ RUN pip install mpld3 && \
     pip install pyexcel-ods && \
     pip install sklearn-pandas && \
     pip install stemming && \
-    pip install fbprophet && \
+    pip install prophet && \
     pip install holoviews && \
     pip install geoviews && \
     pip install hypertools && \
@@ -314,9 +323,7 @@ RUN pip install mpld3 && \
     pip install lightfm && \
     pip install folium && \
     pip install scikit-plot && \
-    # dipy requires the optional fury dependency for visualizations.
-    # b/217761018 pinned fury to fix test
-    pip install fury==0.7.1 dipy && \
+    pip install fury dipy && \
     pip install plotnine && \
     pip install scikit-surprise && \
     pip install pymongo && \
@@ -391,17 +398,19 @@ RUN pip install bleach && \
     pip install ipywidgets && \
     pip install isoweek && \
     pip install jedi && \
-    pip install Jinja2 && \
     pip install jsonschema && \
     pip install jupyter-client && \
     pip install jupyter-console && \
     pip install jupyter-core && \
+    pip install jupyterlab-lsp && \
     pip install MarkupSafe && \
     pip install mistune && \
-    pip install nbconvert && \
+    # b/227194111 install latest version of nbconvert until the base image includes nbconvert >= 6.4.5
+    pip install --upgrade nbconvert Jinja2 && \
     pip install nbformat && \
     pip install notebook && \
     pip install papermill && \
+    pip install python-lsp-server[all] && \
     pip install olefile && \
     # b/198300835 kornia 0.5.10 is not compatible with our version of numpy.
     pip install kornia==0.5.8 && \
@@ -488,15 +497,16 @@ RUN pip install flashtext && \
     pip install bqplot && \
     pip install earthengine-api && \
     pip install transformers && \
+    # b/232247930 >= 2.2.0 requires pyarrow >= 6.0.0 which conflicts with dependencies for rapidsai 0.21.*
+    pip install datasets==2.1.0 && \
     pip install dlib && \
     pip install kaggle-environments && \
     pip install geopandas && \
     pip install nnabla && \
     pip install vowpalwabbit && \
     pip install pydub && \
     pip install pydegensac && \
-    # b/215182966 torchmetrics 0.7.0 is causing an issue with pytorch-lightning.
-    pip install torchmetrics==0.6.2 && \
+    pip install torchmetrics && \
     pip install pytorch-lightning && \
     pip install datatable && \
     pip install sympy && \
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -37,6 +37,8 @@ pipeline {
                 --build-arg TORCHAUDIO_VERSION=$TORCHAUDIO_VERSION \
                 --build-arg TORCHTEXT_VERSION=$TORCHTEXT_VERSION \
                 --build-arg TORCHVISION_VERSION=$TORCHVISION_VERSION \
+                --build-arg CUDA_MAJOR_VERSION=$CUDA_MAJOR_VERSION \
+                --build-arg CUDA_MINOR_VERSION=$CUDA_MINOR_VERSION \
                 --push
             '''
           }
diff --git a/config.txt b/config.txt
@@ -1,9 +1,12 @@
 BASE_IMAGE_REPO=gcr.io/deeplearning-platform-release
-BASE_IMAGE_TAG=m88
+BASE_IMAGE_TAG=m91
 CPU_BASE_IMAGE_NAME=tf2-cpu.2-6
 GPU_BASE_IMAGE_NAME=tf2-gpu.2-6
 LIGHTGBM_VERSION=3.3.1
-TORCH_VERSION=1.9.1
-TORCHAUDIO_VERSION=0.9.1
-TORCHTEXT_VERSION=0.10.1
-TORCHVISION_VERSION=0.10.1
+TORCH_VERSION=1.11.0
+# TODO(b/215031404#comment4) Remove zlib sed command after upgrade to >= 0.11.1
+TORCHAUDIO_VERSION=0.11.0
+TORCHTEXT_VERSION=0.12.0
+TORCHVISION_VERSION=0.12.0
+CUDA_MAJOR_VERSION=11
+CUDA_MINOR_VERSION=0
diff --git a/packages/torch.Dockerfile b/packages/torch.Dockerfile
@@ -6,12 +6,15 @@ ARG PACKAGE_VERSION
 ARG TORCHAUDIO_VERSION
 ARG TORCHTEXT_VERSION
 ARG TORCHVISION_VERSION
+ARG CUDA_MAJOR_VERSION
+ARG CUDA_MINOR_VERSION
 
 # TORCHVISION_VERSION is mandatory
 RUN test -n "$TORCHVISION_VERSION"
 
 # Build instructions: https://github.com/pytorch/pytorch#from-source
 RUN conda install astunparse numpy ninja pyyaml mkl mkl-include setuptools==59.5.0 cmake cffi typing_extensions future six requests dataclasses
+RUN conda install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}
 
 # By default, it uses the version from version.txt which includes the `a0` (alpha zero) suffix and part of the git hash.
 # This causes dependency conflicts like these: https://paste.googleplex.com/4786486378496000
@@ -41,6 +44,8 @@ RUN cd /usr/local/src && \
     git checkout tags/v$TORCHAUDIO_VERSION && \
     git submodule sync && \
     git submodule update --init --recursive --jobs 0 && \
+    # TODO(b/215031404#comment4) Remove after upgrade next release (0.11.1)
+    sed -i s?https://zlib.net/zlib-1.2.11.tar.gz?https://sourceforge.net/projects/libpng/files/zlib/1.2.11/zlib-1.2.11.tar.gz? third_party/zlib/CMakeLists.txt && \
     python setup.py bdist_wheel
 
 # Build torchtext
diff --git a/tests/test_catalyst.py b/tests/test_catalyst.py
@@ -141,10 +141,18 @@ def test_mnist(self):
             logdir=logdir,
             num_epochs=num_epochs,
             verbose=False,
-            callbacks=[CheckpointCallback(save_n_best=3, use_runner_logdir=True)]
+            callbacks=[CheckpointCallback(
+                logdir,
+                topk=3,
+                save_best=True,
+                loader_key="valid",
+                metric_key="loss",
+                minimize=True)]
         )
-        
-        with open('./logs/_metrics.json') as f:
+
+        with open('./logs/model.storage.json') as f:
             metrics = json.load(f)
-            self.assertTrue(metrics['train.3']['valid']['loss'] < metrics['train.1']['valid']['loss'])
-            self.assertTrue(metrics['best']['valid']['loss'] < 0.35)
+            storage = metrics['storage']
+            self.assertEqual(3, len(storage))
+            self.assertTrue(storage[0]['metric'] < storage[2]['metric'])
+            self.assertTrue(storage[0]['metric']< 0.35)
diff --git a/tests/test_hf_datasets.py b/tests/test_hf_datasets.py
@@ -0,0 +1,16 @@
+import unittest
+
+from datasets import Dataset
+
+
+class TestHuggingFaceDatasets(unittest.TestCase):
+
+    def test_map(self):
+        def some_func(batch):
+            batch['label'] = 'foo'
+            return batch
+            
+        df = Dataset.from_dict({'text': ['Kaggle rocks!']})
+        mapped_df = df.map(some_func)
+        
+        self.assertEqual('foo', mapped_df[0]['label'])
diff --git a/tests/test_jupyter_nbconvert.py b/tests/test_jupyter_nbconvert.py
@@ -3,7 +3,7 @@
 import subprocess
 
 class TestJupyterNbconvert(unittest.TestCase):
-    def test_nbconvert(self):
+    def test_nbconvert_to_notebook(self):
         result = subprocess.run([
             'jupyter',
             'nbconvert',
@@ -17,4 +17,20 @@ def test_nbconvert(self):
         ], stdout=subprocess.PIPE)
 
         self.assertEqual(0, result.returncode)
-        self.assertTrue(b'999' in result.stdout)
+        self.assertTrue(b'999' in result.stdout)
+
+    def test_nbconvert_to_html(self):
+        result = subprocess.run([
+            'jupyter',
+            'nbconvert',
+            '--to',
+            'html',
+            '--template',
+            '/opt/kaggle/nbconvert-extensions.tpl',
+            '--execute',
+            '--stdout',
+            '/input/tests/data/notebook.ipynb',
+        ], stdout=subprocess.PIPE)
+
+        self.assertEqual(0, result.returncode)
+        self.assertTrue(b'999' in result.stdout)
diff --git a/tests/test_jupyterlab_lsp.py b/tests/test_jupyterlab_lsp.py
@@ -0,0 +1,31 @@
+import os
+import unittest
+
+from jupyter_server.serverapp import ServerApp
+
+# Adapted from:
+# https://github.com/jupyter-lsp/jupyterlab-lsp/blob/ce76fab170feea506faf9ef47e4bd6a468c24313/python_packages/jupyter_lsp/jupyter_lsp/tests/test_extension.py
+class TestJupyterLabLsp(unittest.TestCase):
+    def test_serverextension_path(self):
+        import jupyter_lsp
+        paths = jupyter_lsp._jupyter_server_extension_paths()
+        for path in paths:
+            self.assertTrue(__import__(path["module"]))
+
+
+    def test_serverextension(self):
+        app = ServerApp()
+        app.initialize(
+            ["--ServerApp.jpserver_extensions={'jupyter_lsp.serverextension': True}"],
+            new_httpserver=False,
+        )
+        self.assertTrue(app.language_server_manager)
+
+        found_lsp = False
+        for r in app.web_app.default_router.rules:
+            for rr in r.target.rules:
+                if "/lsp/" in str(rr.matcher.regex):
+                    found_lsp = True
+
+        self.assertTrue(found_lsp, "didn't install the /lsp/ route")
+
diff --git a/tests/test_prophet.py b/tests/test_prophet.py
@@ -3,9 +3,9 @@
 import numpy as np
 import pandas as pd
 
-from fbprophet import Prophet
+from prophet import Prophet
 
-class TestFbProphet(unittest.TestCase):
+class TestProphet(unittest.TestCase):
     def test_fit(self):
         train = pd.DataFrame({
             'ds': np.array(['2012-05-18', '2012-05-20']),
diff --git a/tests/test_python_lsp_server.py b/tests/test_python_lsp_server.py
@@ -0,0 +1,18 @@
+import subprocess
+import unittest
+
+
+class TestPythonLspServer(unittest.TestCase):
+    def test_initialize(self):
+        server = subprocess.Popen(
+            'python -m pylsp --check-parent-process',
+            shell=True,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE)
+
+        response = server.communicate(input=
+            b'Content-Length: 67\r\n'
+            b'Content-Type: application/vscode-jsonrpc; charset=utf8\r\n'
+            b'\r\n'
+            b'{"id": "a", "jsonrpc": "2.0", "method": "initialize", "params": {}}')[0]
+        assert 'capabilities' in response.decode('utf-8')
diff --git a/tests/test_pytorch.py b/tests/test_pytorch.py
@@ -15,6 +15,14 @@ def test_nn(self):
         data_torch = autograd.Variable(torch.randn(2, 5))
         linear_torch(data_torch)
 
+    @gpu_test
+    def test_linalg(self):
+        A = torch.randn(3, 3).t().to('cuda')
+        B = torch.randn(3).t().to('cuda')
+
+        result = torch.linalg.solve(A, B)
+        self.assertEqual(3, result.shape[0])
+
     @gpu_test
     def test_gpu_computation(self):
         cuda = torch.device('cuda')  
diff --git a/tests/test_sklearnex.py b/tests/test_sklearnex.py
@@ -0,0 +1,12 @@
+import unittest
+
+import numpy as np
+
+class TestSklearnex(unittest.TestCase):
+    def test_dbscan(self):
+        from sklearnex.cluster import DBSCAN
+        X = np.array([[1., 2.], [2., 2.], [2., 3.],
+            [8., 7.], [8., 8.], [25., 80.]], dtype=np.float32)
+
+        clustering = DBSCAN(eps=3, min_samples=2).fit(X)
+        np.testing.assert_array_equal(np.array([0, 0, 0, 1, 1, -1]), clustering.labels_)
diff --git a/tests/test_tsfresh.py b/tests/test_tsfresh.py
@@ -12,7 +12,7 @@ def test_extract_feature(self):
             'time': np.array([0,1,2,0,1,2]),
             'x': np.array([3,4,5,7,8,10])
         })
-        extracted_features = extract_features(ts, column_id='id', column_sort='time')
+        extracted_features = extract_features(ts, column_id='id', column_sort='time', n_jobs=1)
         self.assertEqual(2, len(extracted_features))
 
 

Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,8 @@ pipeline {`
`37`	`37`	`--build-arg TORCHAUDIO_VERSION=$TORCHAUDIO_VERSION \`
`38`	`38`	`--build-arg TORCHTEXT_VERSION=$TORCHTEXT_VERSION \`
`39`	`39`	`--build-arg TORCHVISION_VERSION=$TORCHVISION_VERSION \`
	`40`	`+ --build-arg CUDA_MAJOR_VERSION=$CUDA_MAJOR_VERSION \`
	`41`	`+ --build-arg CUDA_MINOR_VERSION=$CUDA_MINOR_VERSION \`
`40`	`42`	`--push`
`41`	`43`	`'''`
`42`	`44`	`}`