PipelineAI
diff --git a/‎10_Privacy_Model.ipynb
+234 b/‎10_Privacy_Model.ipynb
+234
diff --git a/‎mnist_privacy/README.md
-10 b/‎mnist_privacy/README.md
-10
diff --git a/‎mnist_privacy/input/training/train-images-idx3-ubyte
-44.9 MB b/‎mnist_privacy/input/training/train-images-idx3-ubyte
-44.9 MB
diff --git a/‎mnist_privacy/input/validation/train-labels-idx1-ubyte
-58.6 KB b/‎mnist_privacy/input/validation/train-labels-idx1-ubyte
-58.6 KB
diff --git a/‎mnist_privacy/pipeline_conda_environment.yaml
-8 b/‎mnist_privacy/pipeline_conda_environment.yaml
-8
diff --git a/‎mnist_privacy/pipeline_condarc
-20 b/‎mnist_privacy/pipeline_condarc
-20
diff --git a/‎mnist_privacy/pipeline_index.html
-1 b/‎mnist_privacy/pipeline_index.html
-1
diff --git a/‎mnist_privacy/pipeline_invoke_python.py
-76 b/‎mnist_privacy/pipeline_invoke_python.py
-76
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright 2018, The TensorFlow Authors.\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# you may not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "#      http://www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License.\n",
+    "\n",
+    "\"\"\"Training a CNN on MNIST with differentially private SGD optimizer.\"\"\"\n",
+    "\n",
+    "from __future__ import absolute_import\n",
+    "from __future__ import division\n",
+    "from __future__ import print_function\n",
+    "\n",
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "from privacy.analysis.rdp_accountant import compute_rdp\n",
+    "from privacy.analysis.rdp_accountant import get_privacy_spent\n",
+    "from privacy.optimizers import dp_optimizer\n",
+    "\n",
+    "tf.flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False,'\n",
+    "                        'train with vanilla SGD.')\n",
+    "tf.flags.DEFINE_float('learning_rate', 0.08, 'Learning rate for training')\n",
+    "tf.flags.DEFINE_float('noise_multiplier', 1.12,\n",
+    "                      'Ratio of the standard deviation to the clipping norm')\n",
+    "tf.flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')\n",
+    "tf.flags.DEFINE_integer('batch_size', 32, 'Batch size')\n",
+    "tf.flags.DEFINE_integer('epochs', 1, 'Number of epochs')\n",
+    "tf.flags.DEFINE_integer('microbatches', 32,\n",
+    "                        'Number of microbatches (must evenly divide batch_size')\n",
+    "tf.flags.DEFINE_string('model_dir', None, 'Model directory')\n",
+    "tf.flags.DEFINE_string('export_dir', './pipeline_tfserving/0', 'Export dir')\n",
+    "\n",
+    "FLAGS = tf.flags.FLAGS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cnn_model_fn(features, labels, mode):\n",
+    "  \"\"\"Model function for a CNN.\"\"\"\n",
+    "\n",
+    "  # Define CNN architecture using tf.keras.layers.\n",
+    "  input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])\n",
+    "  y = tf.keras.layers.Conv2D(16, 8,\n",
+    "                             strides=2,\n",
+    "                             padding='same',\n",
+    "                             kernel_initializer='he_normal').apply(input_layer)\n",
+    "  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)\n",
+    "  y = tf.keras.layers.Conv2D(32, 4,\n",
+    "                             strides=2,\n",
+    "                             padding='valid',\n",
+    "                             kernel_initializer='he_normal').apply(y)\n",
+    "  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)\n",
+    "  y = tf.keras.layers.Flatten().apply(y)\n",
+    "  y = tf.keras.layers.Dense(32, kernel_initializer='he_normal').apply(y)\n",
+    "  logits = tf.keras.layers.Dense(10, kernel_initializer='he_normal').apply(y)\n",
+    "\n",
+    "  # Calculate loss as a vector (to support microbatches in DP-SGD).\n",
+    "  vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
+    "      labels=labels, logits=logits)\n",
+    "  # Define mean of loss across minibatch (for reporting through tf.Estimator).\n",
+    "  scalar_loss = tf.reduce_mean(vector_loss)\n",
+    "\n",
+    "  # Configure the training op (for TRAIN mode).\n",
+    "  if mode == tf.estimator.ModeKeys.TRAIN:\n",
+    "\n",
+    "    if FLAGS.dpsgd:\n",
+    "      # Use DP version of GradientDescentOptimizer. For illustration purposes,\n",
+    "      # we do that here by calling make_optimizer_class() explicitly, though DP\n",
+    "      # versions of standard optimizers are available in dp_optimizer.\n",
+    "      dp_optimizer_class = dp_optimizer.make_optimizer_class(\n",
+    "          tf.train.GradientDescentOptimizer)\n",
+    "      optimizer = dp_optimizer_class(\n",
+    "          learning_rate=FLAGS.learning_rate,\n",
+    "          noise_multiplier=FLAGS.noise_multiplier,\n",
+    "          l2_norm_clip=FLAGS.l2_norm_clip,\n",
+    "          num_microbatches=FLAGS.microbatches)\n",
+    "      opt_loss = vector_loss\n",
+    "    else:\n",
+    "      optimizer = tf.train.GradientDescentOptimizer(\n",
+    "          learning_rate=FLAGS.learning_rate)\n",
+    "      opt_loss = scalar_loss\n",
+    "    global_step = tf.train.get_global_step()\n",
+    "    train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)\n",
+    "    # In the following, we pass the mean of the loss (scalar_loss) rather than\n",
+    "    # the vector_loss because tf.estimator requires a scalar loss. This is only\n",
+    "    # used for evaluation and debugging by tf.estimator. The actual loss being\n",
+    "    # minimized is opt_loss defined above and passed to optimizer.minimize().\n",
+    "    return tf.estimator.EstimatorSpec(mode=mode,\n",
+    "                                      loss=scalar_loss,\n",
+    "                                      train_op=train_op)\n",
+    "\n",
+    "  # Add evaluation metrics (for EVAL mode).\n",
+    "  elif mode == tf.estimator.ModeKeys.EVAL:\n",
+    "    eval_metric_ops = {\n",
+    "        'accuracy':\n",
+    "            tf.metrics.accuracy(\n",
+    "                labels=labels,\n",
+    "                predictions=tf.argmax(input=logits, axis=1))\n",
+    "    }\n",
+    "    return tf.estimator.EstimatorSpec(mode=mode,\n",
+    "                                      loss=scalar_loss,\n",
+    "                                      eval_metric_ops=eval_metric_ops)\n",
+    "\n",
+    "\n",
+    "def load_mnist():\n",
+    "  \"\"\"Loads MNIST and preprocesses to combine training and validation data.\"\"\"\n",
+    "  train, test = tf.keras.datasets.mnist.load_data()\n",
+    "  train_data, train_labels = train\n",
+    "  test_data, test_labels = test\n",
+    "\n",
+    "  train_data = np.array(train_data, dtype=np.float32) / 255\n",
+    "  test_data = np.array(test_data, dtype=np.float32) / 255\n",
+    "\n",
+    "  train_labels = np.array(train_labels, dtype=np.int32)\n",
+    "  test_labels = np.array(test_labels, dtype=np.int32)\n",
+    "\n",
+    "  assert train_data.min() == 0.\n",
+    "  assert train_data.max() == 1.\n",
+    "  assert test_data.min() == 0.\n",
+    "  assert test_data.max() == 1.\n",
+    "  assert len(train_labels.shape) == 1\n",
+    "  assert len(test_labels.shape) == 1\n",
+    "\n",
+    "  return train_data, train_labels, test_data, test_labels\n",
+    "\n",
+    "\n",
+    "\n",
+    "tf.logging.set_verbosity(tf.logging.INFO)\n",
+    "if FLAGS.batch_size % FLAGS.microbatches != 0:\n",
+    "  raise ValueError('Number of microbatches should divide evenly batch_size')\n",
+    "\n",
+    "# Load training and test data.\n",
+    "train_data, train_labels, test_data, test_labels = load_mnist()\n",
+    "\n",
+    "# Instantiate the tf.Estimator.\n",
+    "mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,\n",
+    "                                            model_dir=FLAGS.model_dir)\n",
+    "\n",
+    "# Create tf.Estimator input functions for the training and test data.\n",
+    "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
+    "      x={'x': train_data},\n",
+    "      y=train_labels,\n",
+    "      batch_size=FLAGS.batch_size,\n",
+    "      num_epochs=FLAGS.epochs,\n",
+    "      shuffle=True)\n",
+    "eval_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
+    "      x={'x': test_data},\n",
+    "      y=test_labels,\n",
+    "      num_epochs=1,\n",
+    "      shuffle=False)\n",
+    "\n",
+    "# Define a function that computes privacy budget expended so far.\n",
+    "def compute_epsilon(steps):\n",
+    "  \"\"\"Computes epsilon value for given hyperparameters.\"\"\"\n",
+    "  if FLAGS.noise_multiplier == 0.0:\n",
+    "    return float('inf')\n",
+    "  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))\n",
+    "  sampling_probability = FLAGS.batch_size / 60000\n",
+    "  rdp = compute_rdp(q=sampling_probability,\n",
+    "                      noise_multiplier=FLAGS.noise_multiplier,\n",
+    "                      steps=steps,\n",
+    "                      orders=orders)\n",
+    "    # Delta is set to 1e-5 because MNIST has 60000 training points.\n",
+    "  return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]\n",
+    "\n",
+    "# Training loop.\n",
+    "steps_per_epoch = 600 // FLAGS.batch_size\n",
+    "for epoch in range(1, FLAGS.epochs + 1):\n",
+    "  # Train the model for one epoch.\n",
+    "  mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)\n",
+    "\n",
+    "  # Evaluate the model and print results\n",
+    "  eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)\n",
+    "  test_accuracy = eval_results['accuracy']\n",
+    "  print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))\n",
+    "\n",
+    "  # Compute the privacy budget expended so far.\n",
+    "  if FLAGS.dpsgd:\n",
+    "    eps = compute_epsilon(epoch * steps_per_epoch)\n",
+    "    print('For delta=1e-5, the current epsilon is: %.2f' % eps)\n",
+    "  else:\n",
+    "    print('Trained with vanilla non-private SGD optimizer')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}