Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gaussianmechanism function in sklearn_pipeline.py #418

Open
wants to merge 7 commits into
base: feature/machine-learning-1
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 226 additions & 0 deletions src/pydp/ml/mechanisms/sklearn_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,229 @@ def fit(self, X, y=None):
def transform(self, X, y=None):
X = self.sensitivity_calculation(X)
return X




# for gaussian
class GaussianMechanism(BaseEstimator, TransformerMixin):
"""
An SKLearn Pipeline operator for applying differentially private noise
addition using the gaussian mechanism.

"""
"""
static std::unique_ptr<dp::GaussianMechanism> build(double epsilon, double delta,
double l2_sensitivity) {
dp::GaussianMechanism::Builder builder;
builder.SetEpsilon(epsilon);
builder.SetDelta(delta);
builder.SetL2Sensitivity(l2_sensitivity);
return downcast_unique_ptr<dp::GaussianMechanism, dp::NumericalMechanism>(
builder.Build().value());
};

Gaussian depends on these parameters
"""
def __init__(self, epsilon=1.0, delta=1.0, sensitivity=1, accountant=None):
"""
Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready
to be used.

Parameters
----------
epsilon : float or int
The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have
`epsilon > 0`.

sensitivity : float or int
The sensitivity of the mechanism. Must satisfy `sensitivity` > 0.

accountant : BudgetAccountant, optional
Accountant to keep track of privacy budget.


Attributes
-------
epsilon
Privacy budget to calculate noise.

sensitivty
Sensitivity of the mechanism to calculate noise.

accountant
Accountant to keep track of privacy budget.

Raises
------
TypeError
If epsilon is not a number, or sensitivity is not a number or a callable.

ValueError
If epsilon less than 0, or sensitivty is a number but less than 0.
"""

if not isinstance(epsilon, numbers.Number):
raise TypeError(f"Epsilon must be a number. Got type {type(epsilon)}.")

if epsilon <= 0:
raise ValueError("Epsilon must be at least larger than 0.")

self.epsilon = epsilon

if not isinstance(sensitivity, numbers.Number):
if not callable(sensitivity):
raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.")


if isinstance(sensitivity, numbers.Number) and sensitivity <= 0:
raise ValueError("Sensitivity must be at least larger than 0.")

self.sensitivity = sensitivity
self.accountant = BudgetAccountant.load_default(accountant)

self.gaussian = None

if not callable(sensitivity):
self.gaussian = Gaussian()

def sensitivity_calculation(self, X):
"""
Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity
provided if a callable.


Parameters
----------
X : numpy.array
Datset in the form of a 2-dimensional numpy array.

Returns
------
X : numpy.array
Original parameter X with differentially private noise added.
"""

n_feature = X.shape[-1]
n_data = X.shape[0]


for data_idx in range(n_data):
self.accountant.check(self.epsilon, 0)
for feature_idx in range(n_feature):

# Array with data point data_idx removed for feature_idx
feature = np.concatenate((X[:data_idx,feature_idx],X[data_idx + 1:,feature_idx]))

# Calculate sensitivity
sensitivity_ = self.sensitivity(feature)

# Initialized Gaussian mechanism instance
gaussian = Gaussian().set_epsilon(self.epsilon).set_sensitivity(sensitivity_)

# Add noise to the data point that was removed
noised_value = gaussian.randomise(X[data_idx,feature_idx])

# Replaced data point in the dataset with noised version
X[data_idx,feature_idx] = noised_value

self.accountant.spend(self.epsilon, 0)
return X

def fit(self, X, y=None):
return self

def transform(self, X, y=None):
if self.gaussian is not None:
self.gaussian.set_epsilon(self.epsilon).set_sensitivity(self.sensitivity)
vector_randomise = np.vectorize(self.gaussian.randomise)
noised_array = vector_randomise(X)
return noised_array
else:
X = self.sensitivity_calculation( X)
return X





# geometric has lambda
def GeometricMechanism(BaseEstimator, TransformerMixin):
"""
An SKLearn Pipeline operator for applying differentially private noise
addition using the geometric mechanism.

"""

def __init__(self, lambda_=1.0, sensitivity=1, accountant=None):

"""
Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready
to be used.

Parameters
----------
lambda_ : float or int
The value of lambda for achieving :math:`\lambda`-differential privacy with the mechanism. Must have
`lambda > 0`.

sensitivity : float or int
The sensitivity of the mechanism. Must satisfy `sensitivity` > 0.

accountant : BudgetAccountant, optional
Accountant to keep track of privacy budget.


Attributes
-------
lambda_
Privacy budget to calculate noise.

sensitivty
Sensitivity of the mechanism to calculate noise.

accountant
Accountant to keep track of privacy budget.

Raises
------
TypeError
If lambda_ is not a number, or sensitivity is not a number or a callable.

ValueError
If lambda_ less than 0, or sensitivty is a number but less than 0.
"""

if not isinstance(lambda_, numbers.Number):
raise TypeError(f"Lambda must be a number. Got type {type(lambda_)}.")

if lambda_ <= 0:
raise ValueError("Lambda must be at least larger than 0.")

self.lambda_ = lambda_

if not isinstance(sensitivity, numbers.Number):
if not callable(sensitivity):
raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.")


if isinstance(sensitivity, numbers.Number) and sensitivity <= 0:
raise ValueError("Sensitivity must be at least larger than 0.")

self.sensitivity = sensitivity
self.accountant = BudgetAccountant.
self.BaseEstimator = BaseEstimator
self.TransformerMixin = TransformerMixin

def fit(self, X, y=None):
return self

def transform(self, X, y=None):
if self.geometric is not None:
self.geometric.set_delta(self.delta).set_sensitivity(self.sensitivity)
vector_randomise = np.vectorize(self.geometric.randomise)
noised_array = vector_randomise(X)
return noised_array
else:
X = self.sensitivity_calculation( X)
return X