Skip to content

Commit

Permalink
MNT Compatibility with sklearn 1.0 (#864)
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre authored Sep 29, 2021
1 parent edf6eae commit f407976
Show file tree
Hide file tree
Showing 30 changed files with 252 additions and 133 deletions.
10 changes: 0 additions & 10 deletions .pep8speaks.yml

This file was deleted.

13 changes: 5 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
repos:
- repo: https://github.com/python/black
rev: stable
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 21.6b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.8
rev: 3.9.2
hooks:
- id: flake8
types: [file, python]
# only check for unused imports for now, as long as
# the code is not fully PEP8 compatible
args: [--select=F401]
31 changes: 19 additions & 12 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@ jobs:
- job: linting
displayName: Linting
pool:
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
steps:
- bash: echo "##vso[task.prependpath]$CONDA/bin"
displayName: Add conda to PATH
- bash: sudo chown -R $USER $CONDA
displayName: Take ownership of conda installation
- bash: conda create --name flake8_env --yes flake8
displayName: Install flake8
- task: UsePythonVersion@0
inputs:
versionSpec: '3.9'
- bash: |
# Include pytest compatibility with mypy
pip install pytest flake8 mypy==0.782 black==21.6b0
displayName: Install linters
- bash: |
black --check .
displayName: Run black
- bash: |
source activate flake8_env
./build_tools/circle/linting.sh
displayName: Run linting
- template: build_tools/azure/posix.yml
parameters:
name: Linux_Runs
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
matrix:
pylatest_pip_openblas_pandas:
DISTRIB: 'conda-pip-latest'
Expand All @@ -33,15 +36,14 @@ jobs:
- template: build_tools/azure/posix.yml
parameters:
name: Linux
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
dependsOn: [linting]
matrix:
# Linux environment to test that scikit-learn can be built against
# versions of numpy, scipy with ATLAS that comes with Ubuntu Bionic 18.04
# i.e. numpy 1.13.3 and scipy 0.19
py36_ubuntu_atlas:
DISTRIB: 'ubuntu'
PYTHON_VERSION: '3.6'
JOBLIB_VERSION: '*'
pylatest_conda_pandas_keras:
DISTRIB: 'conda'
Expand All @@ -61,11 +63,16 @@ jobs:
TENSORFLOW_VERSION: '*'
COVERAGE: 'true'
TEST_DOCSTRINGS: 'true'
sklearn_0_24_conda:
DISTRIB: 'conda'
PYTHON_VERSION: '3.7'
SKLEARN_VERSION: '0.24.2'
INSTALL_MKL: 'true'

- template: build_tools/azure/posix-32.yml
parameters:
name: Linux32
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
dependsOn: [linting]
matrix:
py36_ubuntu_atlas_32bit:
Expand Down
7 changes: 6 additions & 1 deletion build_tools/azure/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,15 @@ if [[ "$DISTRIB" == "conda" ]]; then
fi

make_conda $TO_INSTALL
python -m pip install scikit-learn

TO_INSTALL=""

if [[ -n "$SKLEARN_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL scikit-learn=$SKLEARN_VERSION"
else
TO_INSTALL="$TO_INSTALL scikit-learn"
fi

if [[ -n "$PANDAS_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL pandas=$PANDAS_VERSION"
fi
Expand Down
2 changes: 1 addition & 1 deletion doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The imbalanced-learn package requires the following dependencies:
* python (>=3.6)
* numpy (>=1.13.3)
* scipy (>=0.19.1)
* scikit-learn (>=0.23)
* scikit-learn (>=0.24)
* keras 2 (optional)
* tensorflow (optional)

Expand Down
10 changes: 8 additions & 2 deletions doc/sphinxext/sphinx_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ class IssueRole(object):
EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$")

def __init__(
self, uri_config_option, format_kwarg, github_uri_template, format_text=None,
self,
uri_config_option,
format_kwarg,
github_uri_template,
format_text=None,
):
self.uri_config_option = uri_config_option
self.format_kwarg = format_kwarg
Expand All @@ -103,7 +107,9 @@ def make_node(self, name, issue_no, config, options=None):
)
path = name_map.get(name)
ref = "https://github.com/{issues_github_path}/{path}/{n}".format(
issues_github_path="{}/{}".format(username, repo), path=path, n=issue,
issues_github_path="{}/{}".format(username, repo),
path=path,
n=issue,
)
formatted_issue = self.format_text(issue).lstrip("#")
text = "{username}/{repo}{symbol}{formatted_issue}".format(**locals())
Expand Down
13 changes: 13 additions & 0 deletions doc/whats_new/v0.8.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
.. _changes_0_8:

Version 0.8.1
=============

**In development**

Changelog

Maintenance
...........

- Make `imbalanced-learn` compatible with `scikit-learn` 1.0.
:pr:`864` by :user:`Guillaume Lemaitre <glemaitre>`.

Version 0.8.0
=============

Expand Down
10 changes: 8 additions & 2 deletions imblearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def fit_resample(self, X, y):

output = self._fit_resample(X, y)

y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
y_ = (
label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
)

X_, y_ = arrays_transformer.transform(output[0], y_)
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
Expand Down Expand Up @@ -284,7 +286,11 @@ def fit_resample(self, X, y):

if self.validate:

y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
y_ = (
label_binarize(output[1], classes=np.unique(y))
if binarize_y
else output[1]
)
X_, y_ = arrays_transformer.transform(output[0], y_)
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])

Expand Down
14 changes: 10 additions & 4 deletions imblearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,15 +422,13 @@ def fit(self, X, y, sample_weight=None):
)
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
self._n_features = X.shape[1]

if issparse(X):
# Pre-sort indices to avoid that each individual tree of the
# ensemble sorts the indices.
X.sort_indices()

# Remap output
_, self.n_features_ = X.shape

y = np.atleast_1d(y)
if y.ndim == 2 and y.shape[1] == 1:
warn(
Expand Down Expand Up @@ -627,5 +625,13 @@ def _set_oob_score(self, X, y):

self.oob_score_ = oob_score / self.n_outputs_

@property
def n_features_(self):
"""Number of features when fitting the estimator."""
return getattr(self.n_features_in_, self._n_features)

def _more_tags(self):
return {"multioutput": False}
return {
"multioutput": False,
"multilabel": False,
}
38 changes: 30 additions & 8 deletions imblearn/ensemble/tests/test_easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@
def test_easy_ensemble_classifier(n_estimators, base_estimator):
# Check classification for various parameter settings.
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -72,7 +75,10 @@ def test_easy_ensemble_classifier(n_estimators, base_estimator):
def test_base_estimator():
# Check base_estimator and its default values.
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -91,7 +97,10 @@ def test_base_estimator():

def test_bagging_with_pipeline():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
estimator = EasyEnsembleClassifier(
n_estimators=2,
Expand All @@ -109,7 +118,9 @@ def test_warm_start(random_state=42):
for n_estimators in [5, 10]:
if clf_ws is None:
clf_ws = EasyEnsembleClassifier(
n_estimators=n_estimators, random_state=random_state, warm_start=True,
n_estimators=n_estimators,
random_state=random_state,
warm_start=True,
)
else:
clf_ws.set_params(n_estimators=n_estimators)
Expand Down Expand Up @@ -182,7 +193,10 @@ def test_warm_start_equivalence():
)
def test_easy_ensemble_classifier_error(n_estimators, msg_error):
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
with pytest.raises(ValueError, match=msg_error):
eec = EasyEnsembleClassifier(n_estimators=n_estimators)
Expand All @@ -191,7 +205,10 @@ def test_easy_ensemble_classifier_error(n_estimators, msg_error):

def test_easy_ensemble_classifier_single_estimator():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -205,14 +222,19 @@ def test_easy_ensemble_classifier_single_estimator():

def test_easy_ensemble_classifier_grid_search():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)

parameters = {
"n_estimators": [1, 2],
"base_estimator__n_estimators": [3, 4],
}
grid_search = GridSearchCV(
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()), parameters, cv=5,
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()),
parameters,
cv=5,
)
grid_search.fit(X, y)
18 changes: 14 additions & 4 deletions imblearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def imbalanced_dataset():
[
({"n_estimators": "whatever"}, "n_estimators must be an integer"),
({"n_estimators": -100}, "n_estimators must be greater than zero"),
({"bootstrap": False, "oob_score": True}, "Out of bag estimation only",),
(
{"bootstrap": False, "oob_score": True},
"Out of bag estimation only",
),
],
)
def test_balanced_random_forest_error(imbalanced_dataset, forest_params, err_msg):
Expand Down Expand Up @@ -105,7 +108,10 @@ def test_balanced_random_forest_oob(imbalanced_dataset):
X, y, random_state=42, stratify=y
)
est = BalancedRandomForestClassifier(
oob_score=True, random_state=0, n_estimators=1000, min_samples_leaf=2,
oob_score=True,
random_state=0,
n_estimators=1000,
min_samples_leaf=2,
)

est.fit(X_train, y_train)
Expand Down Expand Up @@ -135,12 +141,16 @@ def test_little_tree_with_small_max_samples():

# First fit with no restriction on max samples
est1 = BalancedRandomForestClassifier(
n_estimators=1, random_state=rng, max_samples=None,
n_estimators=1,
random_state=rng,
max_samples=None,
)

# Second fit with max samples restricted to just 2
est2 = BalancedRandomForestClassifier(
n_estimators=1, random_state=rng, max_samples=2,
n_estimators=1,
random_state=rng,
max_samples=2,
)

est1.fit(X, y)
Expand Down
Loading

0 comments on commit f407976

Please sign in to comment.