Skip to content

Commit 2ad5f87

Browse files
committed
Merge branch 'main' into tpu-1vm-image
2 parents 5978db8 + 389cecd commit 2ad5f87

13 files changed

+161
-32
lines changed

Dockerfile.tmpl

+27-17
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ ARG TORCHVISION_VERSION
1212
FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl
1313
FROM gcr.io/kaggle-images/python-torch-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${TORCH_VERSION} AS torch_whl
1414
FROM ${BASE_IMAGE_REPO}/${GPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
15-
ENV CUDA_MAJOR_VERSION=11
16-
ENV CUDA_MINOR_VERSION=0
15+
ARG CUDA_MAJOR_VERSION
16+
ARG CUDA_MINOR_VERSION
17+
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
18+
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
1719
# NVIDIA binaries from the host are mounted to /opt/bin.
1820
ENV PATH=/opt/bin:${PATH}
1921
# Add CUDA stubs to LD_LIBRARY_PATH to support building the GPU image on a CPU machine.
@@ -51,6 +53,13 @@ RUN pip uninstall -y horovod && \
5153
/tmp/clean-layer.sh
5254
{{ end }}
5355

56+
{{ if eq .Accelerator "gpu" }}
57+
# b/230864778: Temporarily swap the NVIDIA GPG key. Remove once new base image with new GPG key is released.
58+
RUN rm /etc/apt/sources.list.d/cuda.list && \
59+
apt-key del 7fa2af80 && \
60+
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
61+
{{ end }}
62+
5463
# Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections,
5564
# as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346
5665
RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \
@@ -72,8 +81,7 @@ RUN conda config --add channels nvidia && \
7281
conda config --add channels rapidsai && \
7382
# Base image channel order: conda-forge (highest priority), defaults.
7483
# End state: rapidsai (highest priority), nvidia, conda-forge, defaults.
75-
# b/216162758 Pin mkl which last version breaks spacy.
76-
conda install mkl=2021.4.0 cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
84+
conda install mkl cartopy=0.19 imagemagick=7.1 pyproj==3.1.0 && \
7785
/tmp/clean-layer.sh
7886

7987
{{ if eq .Accelerator "gpu" }}
@@ -93,11 +101,12 @@ RUN conda install implicit && \
93101
# Install PyTorch
94102
{{ if eq .Accelerator "gpu" }}
95103
COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
96-
RUN pip install /tmp/torch/*.whl && \
104+
RUN conda install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
105+
pip install /tmp/torch/*.whl && \
97106
rm -rf /tmp/torch && \
98107
/tmp/clean-layer.sh
99108
{{ else }}
100-
RUN pip install torch==$TORCH_VERSION+cpu torchvision==$TORCHVISION_VERSION+cpu torchaudio==$TORCHAUDIO_VERSION torchtext==$TORCHTEXT_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \
109+
RUN pip install torch==$TORCH_VERSION+cpu torchvision==$TORCHVISION_VERSION+cpu torchaudio==$TORCHAUDIO_VERSION+cpu torchtext==$TORCHTEXT_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \
101110
/tmp/clean-layer.sh
102111
{{ end }}
103112

@@ -155,7 +164,7 @@ RUN pip install pycuda && \
155164

156165
RUN pip install pysal && \
157166
pip install seaborn python-dateutil dask python-igraph && \
158-
pip install pyyaml joblib husl geopy ml_metrics mne pyshp && \
167+
pip install pyyaml joblib husl geopy mne pyshp && \
159168
pip install pandas && \
160169
pip install flax && \
161170
# Install h2o from source.
@@ -212,6 +221,8 @@ RUN pip install ibis-framework && \
212221

213222
RUN pip install scipy && \
214223
pip install scikit-learn && \
224+
# Scikit-learn accelerated library for x86
225+
pip install scikit-learn-intelex && \
215226
# HDF5 support
216227
pip install h5py && \
217228
pip install biopython && \
@@ -277,8 +288,6 @@ RUN pip install mpld3 && \
277288
pip install pyldavis==3.2.2 && \
278289
pip install mlxtend && \
279290
pip install altair && \
280-
# b/183944405 pystan 3.x is not compatible with fbprophet.
281-
pip install pystan==2.19.1.1 && \
282291
pip install ImageHash && \
283292
pip install ecos && \
284293
pip install CVXcanon && \
@@ -301,7 +310,7 @@ RUN pip install mpld3 && \
301310
pip install pyexcel-ods && \
302311
pip install sklearn-pandas && \
303312
pip install stemming && \
304-
pip install fbprophet && \
313+
pip install prophet && \
305314
pip install holoviews && \
306315
pip install geoviews && \
307316
pip install hypertools && \
@@ -314,9 +323,7 @@ RUN pip install mpld3 && \
314323
pip install lightfm && \
315324
pip install folium && \
316325
pip install scikit-plot && \
317-
# dipy requires the optional fury dependency for visualizations.
318-
# b/217761018 pinned fury to fix test
319-
pip install fury==0.7.1 dipy && \
326+
pip install fury dipy && \
320327
pip install plotnine && \
321328
pip install scikit-surprise && \
322329
pip install pymongo && \
@@ -391,17 +398,19 @@ RUN pip install bleach && \
391398
pip install ipywidgets && \
392399
pip install isoweek && \
393400
pip install jedi && \
394-
pip install Jinja2 && \
395401
pip install jsonschema && \
396402
pip install jupyter-client && \
397403
pip install jupyter-console && \
398404
pip install jupyter-core && \
405+
pip install jupyterlab-lsp && \
399406
pip install MarkupSafe && \
400407
pip install mistune && \
401-
pip install nbconvert && \
408+
# b/227194111 install latest version of nbconvert until the base image includes nbconvert >= 6.4.5
409+
pip install --upgrade nbconvert Jinja2 && \
402410
pip install nbformat && \
403411
pip install notebook && \
404412
pip install papermill && \
413+
pip install python-lsp-server[all] && \
405414
pip install olefile && \
406415
# b/198300835 kornia 0.5.10 is not compatible with our version of numpy.
407416
pip install kornia==0.5.8 && \
@@ -488,15 +497,16 @@ RUN pip install flashtext && \
488497
pip install bqplot && \
489498
pip install earthengine-api && \
490499
pip install transformers && \
500+
# b/232247930 >= 2.2.0 requires pyarrow >= 6.0.0 which conflicts with dependencies for rapidsai 0.21.*
501+
pip install datasets==2.1.0 && \
491502
pip install dlib && \
492503
pip install kaggle-environments && \
493504
pip install geopandas && \
494505
pip install nnabla && \
495506
pip install vowpalwabbit && \
496507
pip install pydub && \
497508
pip install pydegensac && \
498-
# b/215182966 torchmetrics 0.7.0 is causing an issue with pytorch-lightning.
499-
pip install torchmetrics==0.6.2 && \
509+
pip install torchmetrics && \
500510
pip install pytorch-lightning && \
501511
pip install datatable && \
502512
pip install sympy && \

Jenkinsfile

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ pipeline {
3737
--build-arg TORCHAUDIO_VERSION=$TORCHAUDIO_VERSION \
3838
--build-arg TORCHTEXT_VERSION=$TORCHTEXT_VERSION \
3939
--build-arg TORCHVISION_VERSION=$TORCHVISION_VERSION \
40+
--build-arg CUDA_MAJOR_VERSION=$CUDA_MAJOR_VERSION \
41+
--build-arg CUDA_MINOR_VERSION=$CUDA_MINOR_VERSION \
4042
--push
4143
'''
4244
}

config.txt

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
BASE_IMAGE_REPO=gcr.io/deeplearning-platform-release
2-
BASE_IMAGE_TAG=m88
2+
BASE_IMAGE_TAG=m91
33
CPU_BASE_IMAGE_NAME=tf2-cpu.2-6
44
GPU_BASE_IMAGE_NAME=tf2-gpu.2-6
55
LIGHTGBM_VERSION=3.3.1
6-
TORCH_VERSION=1.9.1
7-
TORCHAUDIO_VERSION=0.9.1
8-
TORCHTEXT_VERSION=0.10.1
9-
TORCHVISION_VERSION=0.10.1
6+
TORCH_VERSION=1.11.0
7+
# TODO(b/215031404#comment4) Remove zlib sed command after upgrade to >= 0.11.1
8+
TORCHAUDIO_VERSION=0.11.0
9+
TORCHTEXT_VERSION=0.12.0
10+
TORCHVISION_VERSION=0.12.0
11+
CUDA_MAJOR_VERSION=11
12+
CUDA_MINOR_VERSION=0

packages/torch.Dockerfile

+5
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@ ARG PACKAGE_VERSION
66
ARG TORCHAUDIO_VERSION
77
ARG TORCHTEXT_VERSION
88
ARG TORCHVISION_VERSION
9+
ARG CUDA_MAJOR_VERSION
10+
ARG CUDA_MINOR_VERSION
911

1012
# TORCHVISION_VERSION is mandatory
1113
RUN test -n "$TORCHVISION_VERSION"
1214

1315
# Build instructions: https://github.com/pytorch/pytorch#from-source
1416
RUN conda install astunparse numpy ninja pyyaml mkl mkl-include setuptools==59.5.0 cmake cffi typing_extensions future six requests dataclasses
17+
RUN conda install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}
1518

1619
# By default, it uses the version from version.txt which includes the `a0` (alpha zero) suffix and part of the git hash.
1720
# This causes dependency conflicts like these: https://paste.googleplex.com/4786486378496000
@@ -41,6 +44,8 @@ RUN cd /usr/local/src && \
4144
git checkout tags/v$TORCHAUDIO_VERSION && \
4245
git submodule sync && \
4346
git submodule update --init --recursive --jobs 0 && \
47+
# TODO(b/215031404#comment4) Remove after upgrade next release (0.11.1)
48+
sed -i s?https://zlib.net/zlib-1.2.11.tar.gz?https://sourceforge.net/projects/libpng/files/zlib/1.2.11/zlib-1.2.11.tar.gz? third_party/zlib/CMakeLists.txt && \
4449
python setup.py bdist_wheel
4550

4651
# Build torchtext

tests/test_catalyst.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,18 @@ def test_mnist(self):
141141
logdir=logdir,
142142
num_epochs=num_epochs,
143143
verbose=False,
144-
callbacks=[CheckpointCallback(save_n_best=3, use_runner_logdir=True)]
144+
callbacks=[CheckpointCallback(
145+
logdir,
146+
topk=3,
147+
save_best=True,
148+
loader_key="valid",
149+
metric_key="loss",
150+
minimize=True)]
145151
)
146-
147-
with open('./logs/_metrics.json') as f:
152+
153+
with open('./logs/model.storage.json') as f:
148154
metrics = json.load(f)
149-
self.assertTrue(metrics['train.3']['valid']['loss'] < metrics['train.1']['valid']['loss'])
150-
self.assertTrue(metrics['best']['valid']['loss'] < 0.35)
155+
storage = metrics['storage']
156+
self.assertEqual(3, len(storage))
157+
self.assertTrue(storage[0]['metric'] < storage[2]['metric'])
158+
self.assertTrue(storage[0]['metric']< 0.35)

tests/test_hf_datasets.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import unittest
2+
3+
from datasets import Dataset
4+
5+
6+
class TestHuggingFaceDatasets(unittest.TestCase):
7+
8+
def test_map(self):
9+
def some_func(batch):
10+
batch['label'] = 'foo'
11+
return batch
12+
13+
df = Dataset.from_dict({'text': ['Kaggle rocks!']})
14+
mapped_df = df.map(some_func)
15+
16+
self.assertEqual('foo', mapped_df[0]['label'])

tests/test_jupyter_nbconvert.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import subprocess
44

55
class TestJupyterNbconvert(unittest.TestCase):
6-
def test_nbconvert(self):
6+
def test_nbconvert_to_notebook(self):
77
result = subprocess.run([
88
'jupyter',
99
'nbconvert',
@@ -17,4 +17,20 @@ def test_nbconvert(self):
1717
], stdout=subprocess.PIPE)
1818

1919
self.assertEqual(0, result.returncode)
20-
self.assertTrue(b'999' in result.stdout)
20+
self.assertTrue(b'999' in result.stdout)
21+
22+
def test_nbconvert_to_html(self):
23+
result = subprocess.run([
24+
'jupyter',
25+
'nbconvert',
26+
'--to',
27+
'html',
28+
'--template',
29+
'/opt/kaggle/nbconvert-extensions.tpl',
30+
'--execute',
31+
'--stdout',
32+
'/input/tests/data/notebook.ipynb',
33+
], stdout=subprocess.PIPE)
34+
35+
self.assertEqual(0, result.returncode)
36+
self.assertTrue(b'999' in result.stdout)

tests/test_jupyterlab_lsp.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
import unittest
3+
4+
from jupyter_server.serverapp import ServerApp
5+
6+
# Adapted from:
7+
# https://github.com/jupyter-lsp/jupyterlab-lsp/blob/ce76fab170feea506faf9ef47e4bd6a468c24313/python_packages/jupyter_lsp/jupyter_lsp/tests/test_extension.py
8+
class TestJupyterLabLsp(unittest.TestCase):
9+
def test_serverextension_path(self):
10+
import jupyter_lsp
11+
paths = jupyter_lsp._jupyter_server_extension_paths()
12+
for path in paths:
13+
self.assertTrue(__import__(path["module"]))
14+
15+
16+
def test_serverextension(self):
17+
app = ServerApp()
18+
app.initialize(
19+
["--ServerApp.jpserver_extensions={'jupyter_lsp.serverextension': True}"],
20+
new_httpserver=False,
21+
)
22+
self.assertTrue(app.language_server_manager)
23+
24+
found_lsp = False
25+
for r in app.web_app.default_router.rules:
26+
for rr in r.target.rules:
27+
if "/lsp/" in str(rr.matcher.regex):
28+
found_lsp = True
29+
30+
self.assertTrue(found_lsp, "didn't install the /lsp/ route")
31+

tests/test_fbprophet.py renamed to tests/test_prophet.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import numpy as np
44
import pandas as pd
55

6-
from fbprophet import Prophet
6+
from prophet import Prophet
77

8-
class TestFbProphet(unittest.TestCase):
8+
class TestProphet(unittest.TestCase):
99
def test_fit(self):
1010
train = pd.DataFrame({
1111
'ds': np.array(['2012-05-18', '2012-05-20']),

tests/test_python_lsp_server.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import subprocess
2+
import unittest
3+
4+
5+
class TestPythonLspServer(unittest.TestCase):
6+
def test_initialize(self):
7+
server = subprocess.Popen(
8+
'python -m pylsp --check-parent-process',
9+
shell=True,
10+
stdin=subprocess.PIPE,
11+
stdout=subprocess.PIPE)
12+
13+
response = server.communicate(input=
14+
b'Content-Length: 67\r\n'
15+
b'Content-Type: application/vscode-jsonrpc; charset=utf8\r\n'
16+
b'\r\n'
17+
b'{"id": "a", "jsonrpc": "2.0", "method": "initialize", "params": {}}')[0]
18+
assert 'capabilities' in response.decode('utf-8')

tests/test_pytorch.py

+8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ def test_nn(self):
1515
data_torch = autograd.Variable(torch.randn(2, 5))
1616
linear_torch(data_torch)
1717

18+
@gpu_test
19+
def test_linalg(self):
20+
A = torch.randn(3, 3).t().to('cuda')
21+
B = torch.randn(3).t().to('cuda')
22+
23+
result = torch.linalg.solve(A, B)
24+
self.assertEqual(3, result.shape[0])
25+
1826
@gpu_test
1927
def test_gpu_computation(self):
2028
cuda = torch.device('cuda')

tests/test_sklearnex.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import unittest
2+
3+
import numpy as np
4+
5+
class TestSklearnex(unittest.TestCase):
6+
def test_dbscan(self):
7+
from sklearnex.cluster import DBSCAN
8+
X = np.array([[1., 2.], [2., 2.], [2., 3.],
9+
[8., 7.], [8., 8.], [25., 80.]], dtype=np.float32)
10+
11+
clustering = DBSCAN(eps=3, min_samples=2).fit(X)
12+
np.testing.assert_array_equal(np.array([0, 0, 0, 1, 1, -1]), clustering.labels_)

tests/test_tsfresh.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_extract_feature(self):
1212
'time': np.array([0,1,2,0,1,2]),
1313
'x': np.array([3,4,5,7,8,10])
1414
})
15-
extracted_features = extract_features(ts, column_id='id', column_sort='time')
15+
extracted_features = extract_features(ts, column_id='id', column_sort='time', n_jobs=1)
1616
self.assertEqual(2, len(extracted_features))
1717

1818

0 commit comments

Comments
 (0)