Wrapper for numerical mechanism class [Continuation from OpenMined#372] (OpenMined#380)

levzlotnik · web-flow · commit b592fb15bc4e · 2021-08-07T14:17:57.000+05:30
* initial addition of partition selection

* corrected instantiation to 'builder.Build' instead

* build works, TODO: deal with passing Laplace/Gaussian mechanims builders

* post-review changes, moved partition selection python related code to own submodule

* fixed some formatting

* Added documentation

* post formatting

* added tests for partition selection

* moved imports for patition_selection

* clarified TODO dependency

* attempt to resolve linting issues

* * Added exports in algorithms.partition_selection
* Replaced the Create*PartitionStrategy functions with a template
function and instantiantions

* revert a to latest stable commit

* restore the correct commit for google-dp submodule

* added python bindings for numerical mechanisms

* added some python files

* fixed prereqs_linux.sh script

* saving changes

* build + tests work

* added docs for numerical mechanisms
diff --git a/docs/pydp.rst b/docs/pydp.rst
@@ -26,6 +26,17 @@ Algorithms
    :inherited-members:
 
 
+Numerical Mechanisms
+####################
+.. currentmodule:: pydp.algorithms.numerical_mechanisms
+.. autoclass:: NumericalMechanism
+   :members: 
+.. autoclass:: LaplaceMechanism
+   :members:
+   :show-inheritance:
+.. autoclass:: GaussianMechanism
+   :members:
+   :show-inheritance:
 
 Distributions
 #############
@@ -52,4 +63,4 @@ Partition Selection
 .. currentmodule:: pydp.algorithms.partition_selection
 .. autoclass:: PartitionSelectionStrategy
    :members: 
-.. autofunction:: create_partition_strategy
+.. autofunction:: create_partition_strategy
diff --git a/prereqs_linux.sh b/prereqs_linux.sh
@@ -60,7 +60,7 @@ git submodule update --init --recursive
 
 
 # checkout out to particular commit
-cd third_party/differential-privacy && \
+cd third_party/differential-privacy && git checkout 78d3fb8f63ea904ea6449a8276b9070254c650ec
 cd -
 # renaming workspace.bazel to workspace
 mv third_party/differential-privacy/cc/WORKSPACE.bazel third_party/differential-privacy/cc/WORKSPACE
diff --git a/src/bindings/BUILD b/src/bindings/BUILD
@@ -7,7 +7,8 @@ pybind_extension(
         "PyDP/base/*.cpp",
         "PyDP/algorithms/*.cpp",
         "PyDP/pydp_lib/*.hpp",
-        "PyDP/proto/*.cpp"
+        "PyDP/proto/*.cpp",
+        "PyDP/mechanisms/*.cpp"
     ]),
 
     visibility = ["//src/python:__pkg__"],
@@ -23,6 +24,7 @@ pybind_extension(
         "@google_dp//algorithms:bounded-sum",
         "@google_dp//algorithms:bounded-standard-deviation",
         "@google_dp//algorithms:partition-selection",
+        "@google_dp//algorithms:numerical-mechanisms",
         "@google_dp//algorithms:count",
         "@google_dp//algorithms:order-statistics",
         "@google_dp//proto:util-lib"
diff --git a/src/bindings/PyDP/bindings.cpp b/src/bindings/PyDP/bindings.cpp
@@ -29,6 +29,9 @@ void init_algorithms_rand(py::module &);
 // proto
 void init_proto(py::module &);
 
+// numerical mechanisms
+void init_mechanisms_mechanism(py::module &);
+
 PYBIND11_MODULE(_pydp, m) {
   m.doc() = "Google Differential Privacy python extension";
 
@@ -52,6 +55,9 @@ PYBIND11_MODULE(_pydp, m) {
   init_algorithms_rand(mutil);
   init_algorithms_util(mutil);
 
+  auto mnumericalmechanisms = m.def_submodule("_mechanisms", "Numerical Mechanisms.");
+  init_mechanisms_mechanism(mnumericalmechanisms);
+
   // Proto
   // TODO: Delete if it is not necessary (we no longer return StatusOr to the user)
   init_proto(m);
diff --git a/src/bindings/PyDP/mechanisms/mechanism.cpp b/src/bindings/PyDP/mechanisms/mechanism.cpp
@@ -1 +1,184 @@
-#include "mechanism.h"
+#include <pybind11/pybind11.h>
+#include "pybind11/complex.h"
+#include "pybind11/functional.h"
+#include "pybind11/stl.h"
+
+#include "algorithms/distributions.h"
+#include "algorithms/numerical-mechanisms.h"
+
+#include "../pydp_lib/algorithm_builder.hpp"
+#include "../pydp_lib/casting.hpp"
+
+using namespace std;
+
+namespace py = pybind11;
+namespace dp = differential_privacy;
+
+class ConfidenceIntervalBinder {
+ public:
+  static void DeclareIn(py::module& m) {
+    py::class_<dp::ConfidenceInterval> confidence_interval(m, "ConfidenceInterval");
+    confidence_interval.attr("__module__") = "pydp";
+    confidence_interval
+        .def_property("lower_bound", &dp::ConfidenceInterval::lower_bound,
+                      &dp::ConfidenceInterval::set_lower_bound)
+        .def_property("upper_bound", &dp::ConfidenceInterval::upper_bound,
+                      &dp::ConfidenceInterval::set_upper_bound)
+        .def_property("confidence_level", &dp::ConfidenceInterval::confidence_level,
+                      &dp::ConfidenceInterval::set_confidence_level);
+  }
+};
+
+template <typename T>
+py::class_<dp::NumericalMechanism>& DefPyAddNoise(
+    py::class_<dp::NumericalMechanism>& pyclass) {
+  using FunctorType = T (dp::NumericalMechanism::*)(T);
+  return pyclass.def("add_noise",
+                     static_cast<FunctorType>(&dp::NumericalMechanism::AddNoise),
+                     py::arg("result"));
+}
+
+template <typename T, typename U>
+py::class_<dp::NumericalMechanism>& DefPyAddNoise(
+    py::class_<dp::NumericalMechanism>& pyclass) {
+  using FunctorType = T (dp::NumericalMechanism::*)(T, U);
+  return pyclass.def("add_noise",
+                     static_cast<FunctorType>(&dp::NumericalMechanism::AddNoise),
+                     py::arg("result"), py::arg("privacy_budget"));
+}
+
+template <typename T, typename U>
+std::unique_ptr<T> downcast_unique_ptr(std::unique_ptr<U> u_ptr) {
+  static_assert(std::is_base_of<U, T>::value, "Illegal downcast.");
+  T* ptr = dynamic_cast<T*>(u_ptr.release());
+  return std::unique_ptr<T>(ptr);
+}
+
+class NumericalMechanismBinder {
+ public:
+  static void DeclareIn(py::module& m) {
+    py::class_<dp::NumericalMechanism> numerical_mech(m, "NumericalMechanism",
+                                                      R"pbdoc(
+        Base class for all (Ɛ, 𝛿)-differenially private additive noise numerical mechanisms.
+      )pbdoc");
+    numerical_mech.attr("__module__") = "pydp";
+    DefPyAddNoise<int, double>(numerical_mech);
+    DefPyAddNoise<int64_t, double>(numerical_mech);
+    DefPyAddNoise<double, double>(numerical_mech);
+    DefPyAddNoise<int>(numerical_mech);
+    DefPyAddNoise<int64_t>(numerical_mech);
+    DefPyAddNoise<double>(numerical_mech);
+    numerical_mech
+        .def("noised_value_above_threshold",
+             &dp::NumericalMechanism::NoisedValueAboveThreshold,
+             R"pbdoc(
+               Quickly determines if `result` with added noise is above certain `threshold`.
+             )pbdoc")
+        .def("memory_used", &dp::NumericalMechanism::MemoryUsed)
+        .def(
+            "noise_confidence_interval",
+            [](dp::NumericalMechanism& self, double cl, double pb,
+               double nr) -> dp::ConfidenceInterval {
+              auto result = self.NoiseConfidenceInterval(cl, pb, nr);
+              return result.ValueOrDie();
+            },
+            py::arg("confidence_level"), py::arg("privacy_budget"),
+            py::arg("noised_result"),
+            R"pbdoc(
+              Returns the confidence interval of the specified confidence level of the
+              noise that AddNoise() would add with the specified privacy budget.
+              If the returned value is <x,y>, then the noise added has a confidence_level
+              chance of being in the domain [x,y]
+            )pbdoc")
+        .def_property_readonly("epsilon", &dp::NumericalMechanism::GetEpsilon,
+                               "The Ɛ of the numerical mechanism");
+  }
+};
+
+class LaplaceMechanismBinder {
+ public:
+  static std::unique_ptr<dp::LaplaceMechanism> build(double epsilon,
+                                                     double l1_sensitivity) {
+    dp::LaplaceMechanism::Builder builder;
+    builder.SetEpsilon(epsilon);
+    builder.SetSensitivity(l1_sensitivity);
+    builder.SetL1Sensitivity(l1_sensitivity);
+    return downcast_unique_ptr<dp::LaplaceMechanism, dp::NumericalMechanism>(
+        builder.Build().value());
+  }
+
+  static std::unique_ptr<dp::NumericalMechanismBuilder> clone() {
+    dp::LaplaceMechanism::Builder cloner;
+    return std::move(cloner.Clone());
+  }
+
+  static void DeclareIn(py::module& m) {
+    py::class_<dp::LaplaceMechanism, dp::NumericalMechanism> lap_mech(
+        m, "LaplaceMechanism");
+    lap_mech.attr("__module__") = "pydp";
+    lap_mech
+        .def(py::init([](double epsilon, double sensitivity) {
+               return build(epsilon, sensitivity);
+             }),
+             py::arg("epsilon"), py::arg("sensitivity") = 1.0)
+        .def("get_uniform_double", &dp::LaplaceMechanism::GetUniformDouble)
+        // .def("deserialize", &dp::LaplaceMechanism::Deserialize)
+        // .def("serialize", &dp::LaplaceMechanism::Serialize)
+        .def_property_readonly("sensitivity", &dp::LaplaceMechanism::GetSensitivity,
+                               "The L1 sensitivity of the query.")
+        .def_property_readonly("diversity", &dp::LaplaceMechanism::GetDiversity,
+                               "The diversity of the Laplace mechanism.");
+  }
+};
+
+class GaussianMechanismBinder {
+ public:
+  static std::unique_ptr<dp::GaussianMechanism> build(double epsilon, double delta,
+                                                      double l2_sensitivity) {
+    dp::GaussianMechanism::Builder builder;
+    builder.SetEpsilon(epsilon);
+    builder.SetDelta(delta);
+    builder.SetL2Sensitivity(l2_sensitivity);
+    return downcast_unique_ptr<dp::GaussianMechanism, dp::NumericalMechanism>(
+        builder.Build().value());
+  };
+
+  static std::unique_ptr<dp::NumericalMechanismBuilder> clone() {
+    dp::GaussianMechanism::Builder cloner;
+    return std::move(cloner.Clone());
+  };
+
+  static void DeclareIn(py::module& m) {
+    py::class_<dp::GaussianMechanism, dp::NumericalMechanism> gaus_mech(
+        m, "GaussianMechanism");
+    gaus_mech.attr("__module__") = "pydp";
+    gaus_mech
+        .def(py::init([](double epsilon, double delta, double l2_sensitivity) {
+          return build(epsilon, delta, l2_sensitivity);
+        }))
+        // .def("deserialize", &dp::GaussianMechanism::Deserialize)
+        // .def("serialize", &dp::GaussianMechanism::Serialize)
+        .def_property_readonly("delta", &dp::GaussianMechanism::GetDelta,
+                               "The 𝛿 of the Gaussian mechanism.")
+        .def_property_readonly(
+            "std",
+            [](const dp::GaussianMechanism& self) {
+              return dp::GaussianMechanism::CalculateStddev(
+                  self.GetEpsilon(), self.GetDelta(), self.GetL2Sensitivity());
+            },
+            R"pbdoc( 
+              The standard deviation parameter of the 
+              Gaussian mechanism underlying distribution. 
+            )pbdoc")
+        .def_property_readonly("l2_sensitivity",
+                               &dp::GaussianMechanism::GetL2Sensitivity,
+                               "The L2 sensitivity of the query.");
+  }
+};
+
+void init_mechanisms_mechanism(py::module& m) {
+  ConfidenceIntervalBinder::DeclareIn(m);
+  NumericalMechanismBinder::DeclareIn(m);
+  LaplaceMechanismBinder::DeclareIn(m);
+  GaussianMechanismBinder::DeclareIn(m);
+}
diff --git a/src/pydp/algorithms/__init__.py b/src/pydp/algorithms/__init__.py
@@ -1,5 +1,6 @@
 # pydp relative
 from . import laplacian
 from . import partition_selection
+from . import numerical_mechanisms
 
-__all__ = ["laplacian", "partition_selection"]
+__all__ = ["laplacian", "partition_selection", "numerical_mechanisms"]
diff --git a/src/pydp/algorithms/numerical_mechanisms.py b/src/pydp/algorithms/numerical_mechanisms.py
@@ -0,0 +1,6 @@
+from .._pydp._mechanisms import (
+    NumericalMechanism,  # type: ignore
+    GaussianMechanism,  # type: ignore
+    LaplaceMechanism,  # type: ignore
+    ConfidenceInterval,  # type: ignore
+)
diff --git a/tests/algorithms/test_numerical_mechanisms.py b/tests/algorithms/test_numerical_mechanisms.py
@@ -0,0 +1,97 @@
+import numpy as np
+import pytest
+import pydp.algorithms.numerical_mechanisms as num_mech
+from scipy.special import erfinv
+
+
+REL_ERR_TOL = 1e-5
+
+
+def assert_almost_eq(val_true, val_pred):
+    return np.abs((val_true - val_pred) / val_true) < REL_ERR_TOL
+
+
+def test_basic():
+    num_mech_methods = {
+        "add_noise",
+        "noised_value_above_threshold",
+        "memory_used",
+        "noise_confidence_interval",
+        "epsilon",
+    }
+    assert num_mech_methods.issubset(set(dir(num_mech.NumericalMechanism)))
+    epsilon, delta, sensitivity = 1, 1e-7, 5.0
+    with pytest.raises(TypeError):
+        # This is a abstract class, it cannot be instantiated!
+        obj = num_mech.NumericalMechanism(epsilon, delta)
+    obj = num_mech.LaplaceMechanism(epsilon, sensitivity)
+    assert num_mech_methods.issubset(set(dir(obj)))
+    assert {
+        # "deserialize",
+        # "serialize",
+        "get_uniform_double",
+        "memory_used",
+        "sensitivity",
+        "diversity",
+    }.issubset(set(dir(obj)))
+    obj = num_mech.GaussianMechanism(epsilon, delta, sensitivity)
+    assert num_mech_methods.issubset(set(dir(obj)))
+    assert {
+        # "deserialize",
+        # "serialize",
+        "memory_used",
+        "l2_sensitivity",
+        "std",
+        "delta",
+    }.issubset(set(dir(obj)))
+
+
+def test_laplace_mechanism():
+    epsilon, sensitivity = 1, 3.0
+    laplace = num_mech.LaplaceMechanism(epsilon, sensitivity)
+    value = 0
+    value = laplace.add_noise(value)
+    assert type(value) is int
+    value = laplace.add_noise(value, 0.1)
+    assert type(value) is int
+    value = 0.0
+    value = laplace.add_noise(value)
+    assert type(value) is float
+    value = laplace.add_noise(value, 0.1)
+    assert type(value) is float
+    conf_level = 0.5
+    priv_budg = 0.1
+    interval = laplace.noise_confidence_interval(0.5, 0.1, value)
+    assert type(interval) is num_mech.ConfidenceInterval
+    bound = laplace.diversity * np.log(1 - conf_level) / priv_budg
+    lower_bound, upper_bound = value - bound, value + bound
+    assert_almost_eq(lower_bound, interval.lower_bound)
+    assert_almost_eq(upper_bound, interval.upper_bound)
+    assert conf_level == interval.confidence_level
+
+
+def test_gaussian_mechanism():
+    epsilon, delta, l2_sensitivity = 1, 1e-5, 3.0
+    gaussian = num_mech.GaussianMechanism(epsilon, delta, l2_sensitivity)
+    value = 0
+    value = gaussian.add_noise(value)
+    assert type(value) is int
+    value = gaussian.add_noise(value, 0.1)
+    assert type(value) is int
+    value = 0.0
+    value = gaussian.add_noise(value)
+    assert type(value) is float
+    value = gaussian.add_noise(value, 0.1)
+    assert type(value) is float
+    conf_level = 0.5
+    priv_budg = 0.1
+    interval = gaussian.noise_confidence_interval(0.5, 0.1, value)
+    local_gaussian = num_mech.GaussianMechanism(
+        priv_budg * epsilon, priv_budg * delta, l2_sensitivity
+    )
+    assert type(interval) is num_mech.ConfidenceInterval
+    bound = erfinv(-conf_level) * local_gaussian.std * (2 ** 0.5)
+    lower_bound, upper_bound = value - bound, value + bound
+    assert_almost_eq(lower_bound, interval.lower_bound)
+    assert_almost_eq(upper_bound, interval.upper_bound)
+    assert conf_level == interval.confidence_level