Skip to content

Commit f151c00

Browse files
committed
Add diffprivlib
1 parent a4c0469 commit f151c00

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ So far there are source code examples meant to be walked through in the REPL
2323
- [Bokeh](https://docs.bokeh.org/en/latest/index.html)
2424
- [OpenCV](https://opencv.org/)
2525
- [psutil](https://psutil.readthedocs.io/en/latest/)
26+
- [diffprivlb](https://github.com/IBM/differential-privacy-library)
2627

2728
In general, you will need a python3 env and pip install the various packages
2829
before running

src/gigasquid/diffprivlib.clj

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
(ns gigasquid.diffprivlib
2+
(:require [libpython-clj.require :refer [require-python]]
3+
[libpython-clj.python :as py :refer [py. py.. py.-]]
4+
[gigasquid.plot :as plot]))
5+
6+
;;; From https://github.com/IBM/differential-privacy-library
7+
8+
;;; Install: pip install diffprivlib
9+
10+
(require-python '[sklearn.datasets :as datasets])
11+
(require-python '[sklearn.model_selection :as model-selection])
12+
(require-python '[matplotlib.pyplot :as pyplot])
13+
(require-python '[numpy :as np])
14+
(require-python '[diffprivlib.models :as models])
15+
(require-python '[sklearn.metrics :as metrics])
16+
(require-python '[builtins :as python])
17+
18+
;;; Using the iris dataset - load with 80/20 split
19+
20+
(def dataset (datasets/load_iris))
21+
(def iris-data (let [[X-train X-test y-train y-test]
22+
(model-selection/train_test_split (py.- dataset data)
23+
(py.- dataset target)
24+
:test_size 0.2)]
25+
{:X-train X-train :X-test X-test
26+
:y-train y-train :y-test y-test}))
27+
28+
;; Now, let's train a differentially private naive Bayes classifier. Our classifier runs just like an sklearn classifier, so you can get up and running quickly.
29+
30+
;; diffprivlib.models.GaussianNB can be run without any parameters, although this will throw a warning (we need to specify the bounds parameter to avoid this). The privacy level is controlled by the parameter epsilon, which is passed to the classifier at initialisation (e.g. GaussianNB(epsilon=0.1)). The default is epsilon = 1.0.
31+
32+
(def clf (models/GaussianNB))
33+
(py. clf fit (:X-train iris-data) (:y-train iris-data))
34+
35+
;; We can now classify unseen examples, knowing that the trained model is differentially private and preserves the privacy of the 'individuals' in the training set (flowers are entitled to their privacy too!).
36+
37+
(py. clf predict (:X-test iris-data))
38+
39+
;;=> [1 0 1 1 1 2 1 0 2 2 2 2 1 0 0 2 1 0 1 0 0 1 0 1 2 2 0 2 1 1]
40+
41+
;;We can easily evaluate the accuracy of the model for various epsilon values and plot it with matplotlib.
42+
43+
(def epsilons (np/logspace -2 2 50))
44+
(def bounds (python/list [(python/tuple [4.3 7.9]) (python/tuple [2.0 4.4])
45+
(python/tuple [1.1 6.9]) (python/tuple [0.1 2.5])]))
46+
47+
(def accuracy (mapv (fn [epsilon]
48+
(let [clf (models/GaussianNB :bounds bounds :epsilon epsilon)
49+
_ (py. clf fit (:X-train iris-data) (:y-train iris-data))
50+
predictions (->> (:X-test iris-data)
51+
(py. clf predict))]
52+
(metrics/accuracy_score(:y-test iris-data) predictions)))
53+
epsilons))
54+
55+
accuracy
56+
;;=> [0.3333333333333333 0.36666666666666664 0.36666666666666664 0.36666666666666664 0.36666666666666664 0.2 0.3333333333333333 0.3 0.3333333333333333 0.3333333333333333 0.3 0.3 0.6 0.5666666666666667 0.2 0.7 0.6 0.1 0.6666666666666666 0.9 0.6666666666666666 0.6666666666666666 1.0 0.6 0.8 0.7666666666666667 0.8666666666666667 0.8333333333333334 0.9333333333333333 0.8666666666666667 0.9 1.0 0.9333333333333333 0.9333333333333333 0.9 0.9333333333333333 0.8333333333333334 1.0 0.8 0.8 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0]
57+
58+
(plot/with-show-one
59+
(pyplot/semilogx epsilons accuracy)
60+
(pyplot/title "Differentially private Naive Bayes accuracy")
61+
(pyplot/xlabel "epsilon")
62+
(pyplot/ylabel "Accuracy"))

0 commit comments

Comments
 (0)