Skip to content

Commit

Permalink
Add predict_proba Support to PegasosQSVC and NeuralNetworkClassifier (#…
Browse files Browse the repository at this point in the history
…871)

* Adding a predict_proba function to classifiers. (#57)

* Update README.md

* Predict proba for NNC and PegQSVC

* Rewriting predict proba features and docstring

It was very inefficient before and didn't have the validation checks needed. The code is now more clear and docstring has been added.

* Tweak documentation for NNC and PegasosQSVC, silence lint E1101 on torch connector

* Update test with `QNN.predict_proba`

* Update test with `PegasosESVC.predict_proba`

* Added a release note and solved conflicts with main

---------

Co-authored-by: FrancescaSchiav <[email protected]>
Co-authored-by: oscar-wallis <[email protected]>
Co-authored-by: Edoardo Altamura <[email protected]>
Co-authored-by: smens <[email protected]>

* Reformatted docs

* Fix usage of sklearn

---------

Co-authored-by: FrancescaSchiav <[email protected]>
Co-authored-by: oscar-wallis <[email protected]>
Co-authored-by: Edoardo Altamura <[email protected]>
Co-authored-by: smens <[email protected]>
  • Loading branch information
5 people authored Dec 9, 2024
1 parent cd7a332 commit 94ccb0a
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -140,23 +140,76 @@ def _create_objective(self, X: np.ndarray, y: np.ndarray) -> ObjectiveFunction:
return function

def predict(self, X: np.ndarray) -> np.ndarray:
self._check_fitted()
"""
Perform classification on samples in X.
Args:
X (np.ndarray): Input features. For a callable kernel (an instance of
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
predicted, and ``n_*`` denotes the size of the training set.
In the case of a pre-computed kernel, the kernel values in ``X`` must be calculated
with respect to the elements of the set to be predicted and the training set.
Returns:
np.ndarray: An array of shape ``(n_samples,)``, representing the predicted class labels for
each sample in ``X``.
Raises:
QiskitMachineLearningError:
- If the :meth:`predict` method is called before the model has been fit.
ValueError:
- If the pre-computed kernel matrix has the wrong shape and/or dimension.
"""
self._check_fitted()
X, _ = self._validate_input(X)

if self._neural_network.output_shape == (1,):
predict = np.sign(self._neural_network.forward(X, self._fit_result.x))
# Binary classification
raw_output = self._neural_network.forward(X, self._fit_result.x)
predict = np.sign(raw_output)
else:
# Multi-class classification
forward = self._neural_network.forward(X, self._fit_result.x)
predict_ = np.argmax(forward, axis=1)

if self._one_hot:
# Convert class indices to one-hot encoded format
predict = np.zeros(forward.shape)
for i, v in enumerate(predict_):
predict[i, v] = 1
else:
predict = predict_

return self._validate_output(predict)

def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""
Extracts the predicted probabilities for each class based on the output of a neural
network.
Args:
X (np.ndarray): Input features. For a callable kernel (an instance of
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
predicted, and ``n_*`` denotes the size of the training set. In the case of a
pre-computed kernel, the kernel values in ``X`` must be calculated with respect to
the elements of the set to be predicted and the training set.
Returns:
np.ndarray: An array of shape ``(n_samples, n_classes)`` representing the predicted class
probabilities (in the range :math:`[0, 1]`) for each sample in ``X``.
"""
self._check_fitted()
X, _ = self._validate_input(X)

# Assumes an activation function is applied within the forward method
proba = self._neural_network.forward(X, self._fit_result.x)

return proba

def score(self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray | None = None) -> float:
return ClassifierMixin.score(self, X, y, sample_weight)

Expand Down
55 changes: 42 additions & 13 deletions qiskit_machine_learning/algorithms/classifiers/pegasos_qsvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def fit(

self.fit_status_ = PegasosQSVC.FITTED

logger.debug("fit completed after %s", str(datetime.now() - t_0)[:-7])
logger.debug("Fit completed after %s", str(datetime.now() - t_0)[:-7])

return self

Expand All @@ -213,33 +213,62 @@ def predict(self, X: np.ndarray) -> np.ndarray:
Perform classification on samples in X.
Args:
X: Features. For a callable kernel (an instance of
:class:`~qiskit_machine_learning.kernels.BaseKernel`) the shape
should be ``(m_samples, n_features)``, for a precomputed kernel the shape should be
``(m_samples, n_samples)``. Where ``m`` denotes the set to be predicted and ``n`` the
size of the training set. In that case, the kernel values in X have to be calculated
with respect to the elements of the set to be predicted and the training set.
X (np.ndarray): Input features. For a callable kernel (an instance of
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
predicted, and ``n_*`` denotes the size of the training set. In the case of a
pre-computed kernel, the kernel values in ``X`` must be calculated with respect to
the elements of the set to be predicted and the training set.
Returns:
An array of the shape (n_samples), the predicted class labels for samples in X.
np.ndarray: An array of shape ``(n_samples,)``, representing the predicted class labels for
each sample in ``X``.
Raises:
QiskitMachineLearningError:
- predict is called before the model has been fit.
- If the :meth:`predict` method is called before the model has been fit.
ValueError:
- Pre-computed kernel matrix has the wrong shape and/or dimension.
- If the pre-computed kernel matrix has the wrong shape and/or dimension.
"""

t_0 = datetime.now()
values = self.decision_function(X)
y = np.array([self._label_pos if val > 0 else self._label_neg for val in values])
logger.debug("prediction completed after %s", str(datetime.now() - t_0)[:-7])
logger.debug("Prediction completed after %s", str(datetime.now() - t_0)[:-7])

return y

def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""
Extract class prediction probabilities. The decision function values are
not bounded in the range :math:`[0, 1]`. Therefore, these values are
converted into probabilities using the sigmoid activation
function, which maps the real-valued outputs to the :math:`[0, 1]` range.
Args:
X (np.ndarray): Input features. For a callable kernel (an instance of
:class:`~qiskit_machine_learning.kernels.BaseKernel`), the shape
should be ``(m_samples, n_features)``. For a pre-computed kernel, the shape should be
``(m_samples, n_samples)``. Here, ``m_*`` denotes the set to be
predicted, and ``n_*`` denotes the size of the training set. In the case of a
pre-computed kernel, the kernel values in ``X`` must be calculated with respect to
the elements of the set to be predicted and the training set.
Returns:
np.ndarray: An array of shape ``(n_samples, 2)``, representing the predicted class
probabilities (in the range :math:`[0, 1]`) for each sample in ``X``.
"""
values = self.decision_function(X)

probabilities = 1 / (1 + np.exp(-values)) # Sigmoid activation function
probabilities = np.dstack((1 - probabilities, probabilities))[0]

return probabilities

def decision_function(self, X: np.ndarray) -> np.ndarray:
"""
Evaluate the decision function for the samples in X.
Evaluate the decision function for the samples in ``X``.
Args:
X: Features. For a callable kernel (an instance of
Expand All @@ -259,7 +288,7 @@ def decision_function(self, X: np.ndarray) -> np.ndarray:
- Pre-computed kernel matrix has the wrong shape and/or dimension.
"""
if self.fit_status_ == PegasosQSVC.UNFITTED:
raise QiskitMachineLearningError("The PegasosQSVC has to be fit first")
raise QiskitMachineLearningError("The PegasosQSVC has to be fit first.")
if np.ndim(X) != 2:
raise ValueError("X has to be a 2D array")
if self._precomputed and self._n_samples != X.shape[1]:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
features:
- |
The :class:`~qiskit_machine_learning.algorithms.PegasosQSVC` and algorithms derived
from :class:`~qiskit_machine_learning.algorithms.NeuralNetworkClassifier` module now support `predict_proba` function.
This method can be utilized similarly to other `scikit-learn`-based algorithms.
84 changes: 74 additions & 10 deletions test/algorithms/classifiers/test_neural_network_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,58 @@ def parity(x):
return qnn, num_inputs, ansatz.num_parameters

def _generate_data(self, num_inputs: int) -> tuple[np.ndarray, np.ndarray]:
# construct data
"""
Generates synthetic data consisting of randomly generated features and binary labels.
Each label is determined based on the sum of the corresponding feature values. If the sum of
the feature values for a sample is less than or equal to 1, the label is 1. Otherwise, the
label is 0.
Args:
num_inputs (int): The number of features for each sample.
Returns:
tuple[np.ndarray, np.ndarray]: A tuple containing two numpy arrays:
- features: An array of shape ``(6, num_inputs)`` with randomly generated feature values.
- labels: An array of shape ``(6,)`` with binary labels for each sample.
"""
# Fixed number of samples for consistency
num_samples = 6

features = algorithm_globals.random.random((num_samples, num_inputs))

# Assign binary labels based on feature sums
labels = (np.sum(features, axis=1) <= 1).astype(float)

return features, labels

def _generate_data_multiclass(self, num_inputs: int) -> tuple[np.ndarray, np.ndarray]:
"""
Generates synthetic data consisting of randomly generated features and 3 categorical labels.
Each label is determined based on the sum of the corresponding feature values, assigned
as follows:
- Label 0.0 if the sum of features <= 0.5.
- Label 1.0 if 0.5 < sum of features <= 1.0.
- Label 2.0 if sum of features > 1.0.
Args:
num_inputs (int): The number of features for each sample.
Returns:
tuple[np.ndarray, np.ndarray]: A tuple containing two numpy arrays:
- features: An array of shape ``(6, num_inputs)`` with randomly generated feature values.
- labels: An array of shape ``(6,)`` with categorical labels (0, 1, or 2) for each
sample.
"""
# Fixed number of samples for consistency
num_samples = 6

features = algorithm_globals.random.random((num_samples, num_inputs))
labels = 1.0 * (np.sum(features, axis=1) <= 1)

# Assign categorical labels based on feature sums
sums = np.sum(features, axis=1)
labels = np.full_like(sums, 2.0)
labels[sums <= 0.5] = 0.0
labels[(sums > 0.5) & (sums <= 1.0)] = 1.0

return features, labels

Expand Down Expand Up @@ -247,8 +295,13 @@ def test_classifier_with_sampler_qnn_and_cross_entropy(self, opt):
(False, "squared_error"),
)
def test_categorical_data(self, config):
"""Test categorical labels using QNN"""
"""
Tests categorical labels using the QNN classifier with categorical labels.
Args:
config (tuple): Configuration tuple containing whether to use one-hot
encoding and the loss function.
"""
one_hot, loss = config

optimizer = L_BFGS_B(maxiter=5)
Expand All @@ -259,20 +312,29 @@ def test_categorical_data(self, config):

features, labels = self._generate_data(num_inputs)
labels = labels.astype(str)
# convert to categorical

# Convert to categorical labels
labels[labels == "0.0"] = "A"
labels[labels == "1.0"] = "B"

# fit to data
# Fit classifier to the data
classifier.fit(features, labels)

# score
# Evaluate the classifier
score = classifier.score(features, labels)
self.assertGreater(score, 0.5)

# Predict a single sample
predict = classifier.predict(features[0, :])
self.assertIn(predict, ["A", "B"])

# Test predict_proba method
probas = classifier.predict_proba(features)
self.assertEqual(probas.shape, (6, 2))

for proba in probas:
self.assertAlmostEqual(np.sum(proba), 1.0, places=5)

@idata(L1L2_ERRORS + ["cross_entropy"])
def test_sparse_arrays(self, loss):
"""Tests classifier with sparse arrays as features and labels."""
Expand Down Expand Up @@ -375,7 +437,7 @@ def test_binary_classification_with_multiclass_data(self):
"""Test that trying to train a binary classifier with multiclass data raises an error."""

optimizer = L_BFGS_B(maxiter=5)
qnn, num_inputs, num_parameters = self._create_sampler_qnn(output_shape=1)
qnn, _, num_parameters = self._create_sampler_qnn(output_shape=1)
classifier = self._create_classifier(
qnn,
num_parameters,
Expand All @@ -385,11 +447,10 @@ def test_binary_classification_with_multiclass_data(self):

# construct data
num_samples = 3
x = algorithm_globals.random.random((num_samples, num_inputs))
y = np.asarray([0, 1, 2])
features, labels = self._generate_data_multiclass(num_samples)

with self.assertRaises(QiskitMachineLearningError):
classifier.fit(x, y)
classifier.fit(features, labels)

def test_bad_binary_shape(self):
"""Test that trying to train a binary classifier with misshaped data raises an error."""
Expand Down Expand Up @@ -435,6 +496,9 @@ def test_untrained(self):
with self.assertRaises(QiskitMachineLearningError, msg="classifier.predict()"):
classifier.predict(np.asarray([]))

with self.assertRaises(QiskitMachineLearningError, msg="classifier.predict_proba()"):
classifier.predict_proba(np.asarray([]))

with self.assertRaises(QiskitMachineLearningError, msg="classifier.fit_result"):
_ = classifier.fit_result

Expand Down
37 changes: 28 additions & 9 deletions test/algorithms/classifiers/test_pegasos_qsvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,34 @@ def setUp(self):
self.label_test_4d = label_4d[15:]

def test_qsvc(self):
"""Test PegasosQSVC"""
qkernel = FidelityQuantumKernel(feature_map=self.feature_map)

pegasos_qsvc = PegasosQSVC(quantum_kernel=qkernel, C=1000, num_steps=self.tau)

pegasos_qsvc.fit(self.sample_train, self.label_train)
score = pegasos_qsvc.score(self.sample_test, self.label_test)

self.assertEqual(score, 1.0)
"""
Test the Pegasos QSVC algorithm.
"""
quantum_kernel = FidelityQuantumKernel(feature_map=self.feature_map)
classifier = PegasosQSVC(quantum_kernel=quantum_kernel, C=1000, num_steps=self.tau)
classifier.fit(self.sample_train, self.label_train)

# Evaluate the model on the test data
test_score = classifier.score(self.sample_test, self.label_test)
self.assertEqual(test_score, 1.0)

# Expected predictions for the given test data
predicted_labels = classifier.predict(self.sample_test)
self.assertTrue(np.array_equal(predicted_labels, self.label_test))

# Test predict_proba method (normalization is imposed by definition)
probas = classifier.predict_proba(self.sample_test)
expected_probas = np.array(
[
[0.67722117, 0.32277883],
[0.35775209, 0.64224791],
[0.36540916, 0.63459084],
[0.64419096, 0.35580904],
[0.35864466, 0.64135534],
]
)
self.assertEqual(probas.shape, (self.label_test.shape[0], 2))
np.testing.assert_array_almost_equal(probas, expected_probas, decimal=5)

def test_decision_function(self):
"""Test PegasosQSVC."""
Expand Down

0 comments on commit 94ccb0a

Please sign in to comment.