Skip to content

Commit c9dedc5

Browse files
committed
added priacy example
1 parent eb2624c commit c9dedc5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+234
-899
lines changed

10_Privacy_Model.ipynb

+234
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# Copyright 2018, The TensorFlow Authors.\n",
10+
"#\n",
11+
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
12+
"# you may not use this file except in compliance with the License.\n",
13+
"# You may obtain a copy of the License at\n",
14+
"#\n",
15+
"# http://www.apache.org/licenses/LICENSE-2.0\n",
16+
"#\n",
17+
"# Unless required by applicable law or agreed to in writing, software\n",
18+
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
19+
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
20+
"# See the License for the specific language governing permissions and\n",
21+
"# limitations under the License.\n",
22+
"\n",
23+
"\"\"\"Training a CNN on MNIST with differentially private SGD optimizer.\"\"\"\n",
24+
"\n",
25+
"from __future__ import absolute_import\n",
26+
"from __future__ import division\n",
27+
"from __future__ import print_function\n",
28+
"\n",
29+
"import numpy as np\n",
30+
"import tensorflow as tf\n",
31+
"\n",
32+
"from privacy.analysis.rdp_accountant import compute_rdp\n",
33+
"from privacy.analysis.rdp_accountant import get_privacy_spent\n",
34+
"from privacy.optimizers import dp_optimizer\n",
35+
"\n",
36+
"tf.flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False,'\n",
37+
" 'train with vanilla SGD.')\n",
38+
"tf.flags.DEFINE_float('learning_rate', 0.08, 'Learning rate for training')\n",
39+
"tf.flags.DEFINE_float('noise_multiplier', 1.12,\n",
40+
" 'Ratio of the standard deviation to the clipping norm')\n",
41+
"tf.flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')\n",
42+
"tf.flags.DEFINE_integer('batch_size', 32, 'Batch size')\n",
43+
"tf.flags.DEFINE_integer('epochs', 1, 'Number of epochs')\n",
44+
"tf.flags.DEFINE_integer('microbatches', 32,\n",
45+
" 'Number of microbatches (must evenly divide batch_size')\n",
46+
"tf.flags.DEFINE_string('model_dir', None, 'Model directory')\n",
47+
"tf.flags.DEFINE_string('export_dir', './pipeline_tfserving/0', 'Export dir')\n",
48+
"\n",
49+
"FLAGS = tf.flags.FLAGS"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": null,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"def cnn_model_fn(features, labels, mode):\n",
59+
" \"\"\"Model function for a CNN.\"\"\"\n",
60+
"\n",
61+
" # Define CNN architecture using tf.keras.layers.\n",
62+
" input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])\n",
63+
" y = tf.keras.layers.Conv2D(16, 8,\n",
64+
" strides=2,\n",
65+
" padding='same',\n",
66+
" kernel_initializer='he_normal').apply(input_layer)\n",
67+
" y = tf.keras.layers.MaxPool2D(2, 1).apply(y)\n",
68+
" y = tf.keras.layers.Conv2D(32, 4,\n",
69+
" strides=2,\n",
70+
" padding='valid',\n",
71+
" kernel_initializer='he_normal').apply(y)\n",
72+
" y = tf.keras.layers.MaxPool2D(2, 1).apply(y)\n",
73+
" y = tf.keras.layers.Flatten().apply(y)\n",
74+
" y = tf.keras.layers.Dense(32, kernel_initializer='he_normal').apply(y)\n",
75+
" logits = tf.keras.layers.Dense(10, kernel_initializer='he_normal').apply(y)\n",
76+
"\n",
77+
" # Calculate loss as a vector (to support microbatches in DP-SGD).\n",
78+
" vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
79+
" labels=labels, logits=logits)\n",
80+
" # Define mean of loss across minibatch (for reporting through tf.Estimator).\n",
81+
" scalar_loss = tf.reduce_mean(vector_loss)\n",
82+
"\n",
83+
" # Configure the training op (for TRAIN mode).\n",
84+
" if mode == tf.estimator.ModeKeys.TRAIN:\n",
85+
"\n",
86+
" if FLAGS.dpsgd:\n",
87+
" # Use DP version of GradientDescentOptimizer. For illustration purposes,\n",
88+
" # we do that here by calling make_optimizer_class() explicitly, though DP\n",
89+
" # versions of standard optimizers are available in dp_optimizer.\n",
90+
" dp_optimizer_class = dp_optimizer.make_optimizer_class(\n",
91+
" tf.train.GradientDescentOptimizer)\n",
92+
" optimizer = dp_optimizer_class(\n",
93+
" learning_rate=FLAGS.learning_rate,\n",
94+
" noise_multiplier=FLAGS.noise_multiplier,\n",
95+
" l2_norm_clip=FLAGS.l2_norm_clip,\n",
96+
" num_microbatches=FLAGS.microbatches)\n",
97+
" opt_loss = vector_loss\n",
98+
" else:\n",
99+
" optimizer = tf.train.GradientDescentOptimizer(\n",
100+
" learning_rate=FLAGS.learning_rate)\n",
101+
" opt_loss = scalar_loss\n",
102+
" global_step = tf.train.get_global_step()\n",
103+
" train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)\n",
104+
" # In the following, we pass the mean of the loss (scalar_loss) rather than\n",
105+
" # the vector_loss because tf.estimator requires a scalar loss. This is only\n",
106+
" # used for evaluation and debugging by tf.estimator. The actual loss being\n",
107+
" # minimized is opt_loss defined above and passed to optimizer.minimize().\n",
108+
" return tf.estimator.EstimatorSpec(mode=mode,\n",
109+
" loss=scalar_loss,\n",
110+
" train_op=train_op)\n",
111+
"\n",
112+
" # Add evaluation metrics (for EVAL mode).\n",
113+
" elif mode == tf.estimator.ModeKeys.EVAL:\n",
114+
" eval_metric_ops = {\n",
115+
" 'accuracy':\n",
116+
" tf.metrics.accuracy(\n",
117+
" labels=labels,\n",
118+
" predictions=tf.argmax(input=logits, axis=1))\n",
119+
" }\n",
120+
" return tf.estimator.EstimatorSpec(mode=mode,\n",
121+
" loss=scalar_loss,\n",
122+
" eval_metric_ops=eval_metric_ops)\n",
123+
"\n",
124+
"\n",
125+
"def load_mnist():\n",
126+
" \"\"\"Loads MNIST and preprocesses to combine training and validation data.\"\"\"\n",
127+
" train, test = tf.keras.datasets.mnist.load_data()\n",
128+
" train_data, train_labels = train\n",
129+
" test_data, test_labels = test\n",
130+
"\n",
131+
" train_data = np.array(train_data, dtype=np.float32) / 255\n",
132+
" test_data = np.array(test_data, dtype=np.float32) / 255\n",
133+
"\n",
134+
" train_labels = np.array(train_labels, dtype=np.int32)\n",
135+
" test_labels = np.array(test_labels, dtype=np.int32)\n",
136+
"\n",
137+
" assert train_data.min() == 0.\n",
138+
" assert train_data.max() == 1.\n",
139+
" assert test_data.min() == 0.\n",
140+
" assert test_data.max() == 1.\n",
141+
" assert len(train_labels.shape) == 1\n",
142+
" assert len(test_labels.shape) == 1\n",
143+
"\n",
144+
" return train_data, train_labels, test_data, test_labels\n",
145+
"\n",
146+
"\n",
147+
"\n",
148+
"tf.logging.set_verbosity(tf.logging.INFO)\n",
149+
"if FLAGS.batch_size % FLAGS.microbatches != 0:\n",
150+
" raise ValueError('Number of microbatches should divide evenly batch_size')\n",
151+
"\n",
152+
"# Load training and test data.\n",
153+
"train_data, train_labels, test_data, test_labels = load_mnist()\n",
154+
"\n",
155+
"# Instantiate the tf.Estimator.\n",
156+
"mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,\n",
157+
" model_dir=FLAGS.model_dir)\n",
158+
"\n",
159+
"# Create tf.Estimator input functions for the training and test data.\n",
160+
"train_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
161+
" x={'x': train_data},\n",
162+
" y=train_labels,\n",
163+
" batch_size=FLAGS.batch_size,\n",
164+
" num_epochs=FLAGS.epochs,\n",
165+
" shuffle=True)\n",
166+
"eval_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
167+
" x={'x': test_data},\n",
168+
" y=test_labels,\n",
169+
" num_epochs=1,\n",
170+
" shuffle=False)\n",
171+
"\n",
172+
"# Define a function that computes privacy budget expended so far.\n",
173+
"def compute_epsilon(steps):\n",
174+
" \"\"\"Computes epsilon value for given hyperparameters.\"\"\"\n",
175+
" if FLAGS.noise_multiplier == 0.0:\n",
176+
" return float('inf')\n",
177+
" orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))\n",
178+
" sampling_probability = FLAGS.batch_size / 60000\n",
179+
" rdp = compute_rdp(q=sampling_probability,\n",
180+
" noise_multiplier=FLAGS.noise_multiplier,\n",
181+
" steps=steps,\n",
182+
" orders=orders)\n",
183+
" # Delta is set to 1e-5 because MNIST has 60000 training points.\n",
184+
" return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]\n",
185+
"\n",
186+
"# Training loop.\n",
187+
"steps_per_epoch = 600 // FLAGS.batch_size\n",
188+
"for epoch in range(1, FLAGS.epochs + 1):\n",
189+
" # Train the model for one epoch.\n",
190+
" mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)\n",
191+
"\n",
192+
" # Evaluate the model and print results\n",
193+
" eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)\n",
194+
" test_accuracy = eval_results['accuracy']\n",
195+
" print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))\n",
196+
"\n",
197+
" # Compute the privacy budget expended so far.\n",
198+
" if FLAGS.dpsgd:\n",
199+
" eps = compute_epsilon(epoch * steps_per_epoch)\n",
200+
" print('For delta=1e-5, the current epsilon is: %.2f' % eps)\n",
201+
" else:\n",
202+
" print('Trained with vanilla non-private SGD optimizer')\n"
203+
]
204+
},
205+
{
206+
"cell_type": "code",
207+
"execution_count": null,
208+
"metadata": {},
209+
"outputs": [],
210+
"source": []
211+
}
212+
],
213+
"metadata": {
214+
"kernelspec": {
215+
"display_name": "Python 3",
216+
"language": "python",
217+
"name": "python3"
218+
},
219+
"language_info": {
220+
"codemirror_mode": {
221+
"name": "ipython",
222+
"version": 3
223+
},
224+
"file_extension": ".py",
225+
"mimetype": "text/x-python",
226+
"name": "python",
227+
"nbconvert_exporter": "python",
228+
"pygments_lexer": "ipython3",
229+
"version": "3.6.6"
230+
}
231+
},
232+
"nbformat": 4,
233+
"nbformat_minor": 2
234+
}

mnist_privacy/README.md

-10
This file was deleted.
Binary file not shown.
Binary file not shown.

mnist_privacy/pipeline_conda_environment.yaml

-8
This file was deleted.

mnist_privacy/pipeline_condarc

-20
This file was deleted.

mnist_privacy/pipeline_index.html

-1
This file was deleted.

mnist_privacy/pipeline_invoke_python.py

-76
This file was deleted.

0 commit comments

Comments
 (0)