Skip to content

Commit

Permalink
[enhancement] enable dpnp/dpctl testing in test_patching (#1737)
Browse files Browse the repository at this point in the history
* Update test_patching.py

* Update test_patching.py

* Update test_patching.py

* Update test_patching.py

* Update test_patching.py

* Update test_patching.py

* Update test_patching.py

* Update test_patching.py

* Update knn_regression.py

* Update knn_classification.py

* Update knn_unsupervised.py

* Update knn_classification.py

* Update knn_regression.py

* Update knn_unsupervised.py

* Update knn_classification.py

* Update knn_regression.py

* Update knn_unsupervised.py

* Update knn_unsupervised.py

* Update knn_regression.py

* Update knn_classification.py

* Update knn_regression.py

* Update knn_regression.py

* Update _device_offload.py

* Update _device_offload.py

* Update knn_regression.py

* Update knn_regression.py

* fix predic_log_proba

* add predict_log_proba changes

* correction to _forest

* attempt to fix score in ensemble classifiers

* add wrap_output_data

* centralize to _device_offload

* granular introduction of score (unfortunately)

* fixes for n_jobs

* forgot to change nusvc slightly

* add sycl change if necessary

* Update logistic_regression.py

* wrap score

* fix for pca score_samples

* change else depth

* change score

* onedal_transform to transform

* reduce code

* move wrap_output_data

* add recursion to dpnp conversion

* broken fix

* array -> result

* Update _device_offload.py

* deactivate problematic GPU tests

* errant removal

* correcting mistake

* whitespace

* Update _utils.py

* Update _utils.py

* fix speed issues with dpnp/dpctl and _fit_validation

* forgot NearestNeighbors

* fix onedal fit gpu support

* score to dispatch

* _predict -> predict

* score to dispatch

* queue to None

* add _namespace.py

* black formatting

* isort

* attempt at get_namespace inclusion

* remove dependencies

* nusvc move log check

* svc get_namespace

* remove unnecessary import

* fix namespace collision

* minimize changes

* forgot removal of _predict

* forgot to change to get_namespace

* add to init

* Update test_n_jobs_support.py

* kwarg change

* remove fit change

* forgot an n

* forgot self

* fix test_n_jobs_support again

* Update _namespace.py

* Update run_test.sh

* Update run_test.sh

* remove _k_means_0_23.py

* fixing merge

* fixing merge

* Update knn_classification.py

* fix CI failures for GPU:

* remove relative import

* change text

* add boilerplate

* Update _namespace.py

* Update logistic_regression.py

* fix formatting

* forgotten change in check

* remove lasso, elasticNet and Ridge from GPU testing

* remove mention of tickets

* bad if statement

* deal with fp64 support problems

* change text

* move to array api due to slicing issues

* formatting

* remove indexing change

* modify _copy_to_usm to handle dpnp/dpctl arrays

* remove unnecessary code

* revert changes back to how it was in approved review
  • Loading branch information
icfaust authored Apr 12, 2024
1 parent 34f167c commit 28e545d
Show file tree
Hide file tree
Showing 16 changed files with 439 additions and 95 deletions.
2 changes: 2 additions & 0 deletions daal4py/sklearn/cluster/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,3 +695,5 @@ def fit_predict(self, X, y=None, sample_weight=None):
Index of the cluster each sample belongs to.
"""
return super().fit_predict(X, y, sample_weight)

score = support_usm_ndarray()(KMeans_original.score)
35 changes: 31 additions & 4 deletions sklearnex/_device_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import logging
import sys
from collections.abc import Iterable
from functools import wraps

import numpy as np
Expand Down Expand Up @@ -200,9 +201,35 @@ def _copy_to_usm(queue, array):
raise RuntimeError(
"dpctl need to be installed to work " "with __sycl_usm_array_interface__"
)
mem = MemoryUSMDevice(array.nbytes, queue=queue)
mem.copy_from_host(array.tobytes())
return usm_ndarray(array.shape, array.dtype, buffer=mem)

if hasattr(array, "__array__"):

try:
mem = MemoryUSMDevice(array.nbytes, queue=queue)
mem.copy_from_host(array.tobytes())
return usm_ndarray(array.shape, array.dtype, buffer=mem)
except ValueError as e:
# ValueError will raise if device does not support the dtype
# retry with float32 (needed for fp16 and fp64 support issues)
# try again as float32, if it is a float32 just raise the error.
if array.dtype == np.float32:
raise e
return _copy_to_usm(queue, array.astype(np.float32))
else:
if isinstance(array, Iterable):
array = [_copy_to_usm(queue, i) for i in array]
return array


if dpnp_available:

def _convert_to_dpnp(array):
if isinstance(array, usm_ndarray):
return dpnp.array(array, copy=False)
elif isinstance(array, Iterable):
for i in range(len(array)):
array[i] = _convert_to_dpnp(array[i])
return array


def wrap_output_data(func):
Expand All @@ -217,7 +244,7 @@ def wrapper(self, *args, **kwargs):
if usm_iface is not None:
result = _copy_to_usm(usm_iface["syclobj"], result)
if dpnp_available and isinstance(data[0], dpnp.ndarray):
result = dpnp.array(result, copy=False)
result = _convert_to_dpnp(result)
return result

return wrapper
5 changes: 2 additions & 3 deletions sklearnex/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def fit(self, X, y=None):
self._fit(X)
return self

@wrap_output_data
def _fit(self, X):
if sklearn_check_version("1.2"):
self._validate_params()
Expand Down Expand Up @@ -166,13 +167,11 @@ def _onedal_transform(self, X, queue=None):

return self._onedal_estimator.predict(X, queue=queue)

@wrap_output_data
def fit_transform(self, X, y=None):
U, S, Vt = self._fit(X)
if U is None:
# oneDAL PCA was fit
X_transformed = self._onedal_transform(X)
return X_transformed
return self.transform(X)
else:
# Scikit-learn PCA was fit
U = U[:, : self.n_components_]
Expand Down
56 changes: 40 additions & 16 deletions sklearnex/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from sklearn.ensemble._forest import ForestRegressor as sklearn_ForestRegressor
from sklearn.ensemble._forest import _get_n_samples_bootstrap
from sklearn.exceptions import DataConversionWarning
from sklearn.metrics import accuracy_score
from sklearn.tree import (
DecisionTreeClassifier,
DecisionTreeRegressor,
Expand All @@ -51,8 +52,8 @@
from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor
from onedal.primitives import get_tree_state_cls, get_tree_state_reg
from onedal.utils import _num_features, _num_samples
from sklearnex.utils import get_namespace

from .._config import get_config
from .._device_offload import dispatch, wrap_output_data
from .._utils import PatchingConditionsChain

Expand Down Expand Up @@ -151,15 +152,6 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None):

return self

def _fit_proba(self, X, y, sample_weight=None, queue=None):
params = self.get_params()
self.__class__(**params)

# We use stock metaestimators below, so the only way
# to pass a queue is using config_context.
cfg = get_config()
cfg["target_offload"] = queue

def _save_attributes(self):
if self.oob_score:
self.oob_score_ = self._onedal_estimator.oob_score_
Expand All @@ -182,8 +174,6 @@ def _save_attributes(self):
self._validate_estimator()
return self

# TODO:
# move to onedal modul.
def _check_parameters(self):
if isinstance(self.min_samples_leaf, numbers.Integral):
if not 1 <= self.min_samples_leaf:
Expand Down Expand Up @@ -629,9 +619,38 @@ def predict_proba(self, X):
X,
)

def predict_log_proba(self, X):
xp, _ = get_namespace(X)
proba = self.predict_proba(X)

if self.n_outputs_ == 1:
return xp.log(proba)

else:
for k in range(self.n_outputs_):
proba[k] = xp.log(proba[k])

return proba

@wrap_output_data
def score(self, X, y, sample_weight=None):
return dispatch(
self,
"score",
{
"onedal": self.__class__._onedal_score,
"sklearn": sklearn_ForestClassifier.score,
},
X,
y,
sample_weight=sample_weight,
)

fit.__doc__ = sklearn_ForestClassifier.fit.__doc__
predict.__doc__ = sklearn_ForestClassifier.predict.__doc__
predict_proba.__doc__ = sklearn_ForestClassifier.predict_proba.__doc__
predict_log_proba.__doc__ = sklearn_ForestClassifier.predict_log_proba.__doc__
score.__doc__ = sklearn_ForestClassifier.score.__doc__

def _onedal_cpu_supported(self, method_name, *data):
class_name = self.__class__.__name__
Expand All @@ -658,7 +677,7 @@ def _onedal_cpu_supported(self, method_name, *data):
]
)

elif method_name in ["predict", "predict_proba"]:
elif method_name in ["predict", "predict_proba", "score"]:
X = data[0]

patching_status.and_conditions(
Expand Down Expand Up @@ -723,7 +742,7 @@ def _onedal_gpu_supported(self, method_name, *data):
]
)

elif method_name in ["predict", "predict_proba"]:
elif method_name in ["predict", "predict_proba", "score"]:
X = data[0]

patching_status.and_conditions(
Expand Down Expand Up @@ -780,6 +799,11 @@ def _onedal_predict_proba(self, X, queue=None):
self._check_feature_names(X, reset=False)
return self._onedal_estimator.predict_proba(X, queue=queue)

def _onedal_score(self, X, y, sample_weight=None, queue=None):
return accuracy_score(
y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
)


class ForestRegressor(sklearn_ForestRegressor, BaseForest):
_err = "out_of_bag_error_r2|out_of_bag_error_prediction"
Expand Down Expand Up @@ -1098,7 +1122,7 @@ def predict(self, X):
predict.__doc__ = sklearn_ForestRegressor.predict.__doc__


@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba"])
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])
class RandomForestClassifier(ForestClassifier):
__doc__ = sklearn_RandomForestClassifier.__doc__
_onedal_factory = onedal_RandomForestClassifier
Expand Down Expand Up @@ -1509,7 +1533,7 @@ def __init__(
self.min_bin_size = min_bin_size


@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba"])
@control_n_jobs(decorated_methods=["fit", "predict", "predict_proba", "score"])
class ExtraTreesClassifier(ForestClassifier):
__doc__ = sklearn_ExtraTreesClassifier.__doc__
_onedal_factory = onedal_ExtraTreesClassifier
Expand Down
53 changes: 47 additions & 6 deletions sklearnex/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def _save_attributes(self):
import numpy as np
from scipy.sparse import issparse
from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.utils.multiclass import type_of_target
from sklearn.utils.validation import check_X_y

from daal4py.sklearn._n_jobs_support import control_n_jobs
Expand All @@ -50,7 +52,13 @@ def _save_attributes(self):
from ..utils.validation import _assert_all_finite

@control_n_jobs(
decorated_methods=["fit", "predict", "predict_proba", "predict_log_proba"]
decorated_methods=[
"fit",
"predict",
"predict_proba",
"predict_log_proba",
"score",
]
)
class LogisticRegression(sklearn_LogisticRegression, BaseLogisticRegression):
__doc__ = sklearn_LogisticRegression.__doc__
Expand Down Expand Up @@ -160,6 +168,27 @@ def predict_log_proba(self, X):
X,
)

@wrap_output_data
def score(self, X, y, sample_weight=None):
if sklearn_check_version("1.0"):
self._check_feature_names(X, reset=False)
return dispatch(
self,
"score",
{
"onedal": self.__class__._onedal_score,
"sklearn": sklearn_LogisticRegression.score,
},
X,
y,
sample_weight=sample_weight,
)

def _onedal_score(self, X, y, sample_weight=None, queue=None):
return accuracy_score(
y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
)

def _test_type_and_finiteness(self, X_in):
X = np.asarray(X_in)

Expand Down Expand Up @@ -198,6 +227,10 @@ def _onedal_gpu_fit_supported(self, method_name, *data):
(self.warm_start == False, "Warm start is not supported."),
(self.l1_ratio is None, "l1 ratio is not supported."),
(sample_weight is None, "Sample weight is not supported."),
(
type_of_target(y) == "binary",
"Only binary classification is supported",
),
]
)

Expand All @@ -216,22 +249,29 @@ def _onedal_gpu_fit_supported(self, method_name, *data):
return patching_status

def _onedal_gpu_predict_supported(self, method_name, *data):
assert method_name in ["predict", "predict_proba", "predict_log_proba"]
assert len(data) == 1
assert method_name in [
"predict",
"predict_proba",
"predict_log_proba",
"score",
]

class_name = self.__class__.__name__
patching_status = PatchingConditionsChain(
f"sklearn.linear_model.{class_name}.{method_name}"
)

n_samples = _num_samples(*data)
n_samples = _num_samples(data[0])
model_is_sparse = issparse(self.coef_) or (
self.fit_intercept and issparse(self.intercept_)
)
dal_ready = patching_status.and_conditions(
[
(n_samples > 0, "Number of samples is less than 1."),
(not issparse(*data), "Sparse input is not supported."),
(
not any([issparse(i) for i in data]),
"Sparse input is not supported.",
),
(not model_is_sparse, "Sparse coefficients are not supported."),
(
hasattr(self, "_onedal_estimator"),
Expand All @@ -251,7 +291,7 @@ def _onedal_gpu_predict_supported(self, method_name, *data):
def _onedal_gpu_supported(self, method_name, *data):
if method_name == "fit":
return self._onedal_gpu_fit_supported(method_name, *data)
if method_name in ["predict", "predict_proba", "predict_log_proba"]:
if method_name in ["predict", "predict_proba", "predict_log_proba", "score"]:
return self._onedal_gpu_predict_supported(method_name, *data)
raise RuntimeError(
f"Unknown method {method_name} in {self.__class__.__name__}"
Expand Down Expand Up @@ -334,6 +374,7 @@ def _onedal_predict_log_proba(self, X, queue=None):
predict.__doc__ = sklearn_LogisticRegression.predict.__doc__
predict_proba.__doc__ = sklearn_LogisticRegression.predict_proba.__doc__
predict_log_proba.__doc__ = sklearn_LogisticRegression.predict_log_proba.__doc__
score.__doc__ = sklearn_LogisticRegression.score.__doc__

else:
LogisticRegression = LogisticRegression_daal4py
Expand Down
39 changes: 6 additions & 33 deletions sklearnex/neighbors/_lof.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@

from daal4py.sklearn._n_jobs_support import control_n_jobs
from daal4py.sklearn._utils import sklearn_check_version

from .._device_offload import dispatch, wrap_output_data
from .common import KNeighborsDispatchingBase
from .knn_unsupervised import NearestNeighbors
from sklearnex._device_offload import dispatch, wrap_output_data
from sklearnex.neighbors.common import KNeighborsDispatchingBase
from sklearnex.neighbors.knn_unsupervised import NearestNeighbors
from sklearnex.utils import get_namespace


@control_n_jobs(decorated_methods=["fit", "_kneighbors"])
Expand Down Expand Up @@ -112,16 +112,13 @@ def fit(self, X, y=None):
)
return result

# Subtle order change to remove check_array and preserve dpnp and
# dpctl conformance. decision_function will return a dpnp or dpctl
# instance via kneighbors and an equivalent check_array exists in
# that call already in sklearn so no loss of functionality occurs
def _predict(self, X=None):
check_is_fitted(self)

if X is not None:
xp, _ = get_namespace(X)
output = self.decision_function(X) < 0
is_inlier = np.ones(output.shape[0], dtype=int)
is_inlier = xp.ones_like(output, dtype=int)
is_inlier[output] = -1
else:
is_inlier = np.ones(self.n_samples_fit_, dtype=int)
Expand Down Expand Up @@ -158,30 +155,6 @@ def fit_predict(self, X, y=None):
"""
return self.fit(X)._predict()

@available_if(sklearn_LocalOutlierFactor._check_novelty_predict)
@wrap_output_data
def predict(self, X=None):
"""Predict the labels (1 inlier, -1 outlier) of X according to LOF.
**Only available for novelty detection (when novelty is set to True).**
This method allows to generalize prediction to *new observations* (not
in the training set). Note that the result of ``clf.fit(X)`` then
``clf.predict(X)`` with ``novelty=True`` may differ from the result
obtained by ``clf.fit_predict(X)`` with ``novelty=False``.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The query sample or samples to compute the Local Outlier Factor
w.r.t. the training samples.
Returns
-------
is_inlier : ndarray of shape (n_samples,)
Returns -1 for anomalies/outliers and +1 for inliers.
"""
return self._predict(X)

def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
check_is_fitted(self)
if sklearn_check_version("1.0") and X is not None:
Expand Down
Loading

0 comments on commit 28e545d

Please sign in to comment.