Add softmax and sigmoid to Keras and Pytorch network

pobonomo · pobonomo · commit dbab21810afd · 2024-12-05T14:17:36.000+01:00
Missing tests and documentation
diff --git a/notebooks/adversarial/adversarial_keras.ipynb b/notebooks/adversarial/adversarial_keras.ipynb
@@ -89,9 +89,9 @@
     "nn = tf.keras.models.Sequential(\n",
     "    [\n",
     "        tf.keras.layers.InputLayer((28 * 28,)),\n",
-    "        tf.keras.layers.Dense(50, activation=\"relu\"),\n",
-    "        tf.keras.layers.Dense(50, activation=\"relu\"),\n",
-    "        tf.keras.layers.Dense(10),\n",
+    "        tf.keras.layers.Dense(50, activation=\"sigmoid\"),\n",
+    "        tf.keras.layers.Dense(50, activation=\"sigmoid\"),\n",
+    "        tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
     "    ]\n",
     ")"
    ]
@@ -257,7 +257,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.10"
   },
   "license": {
    "full_text": "# Copyright © 2023 Gurobi Optimization, LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# =============================================================================="
diff --git a/notebooks/adversarial/adversarial_pytorch.ipynb b/notebooks/adversarial/adversarial_pytorch.ipynb
@@ -42,8 +42,17 @@
     "import torchvision\n",
     "from skorch import NeuralNetClassifier\n",
     "\n",
-    "import gurobipy as gp\n",
-    "\n",
+    "import gurobipy as gp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
     "from gurobi_ml import add_predictor_constr"
    ]
   },
@@ -100,9 +109,9 @@
     "nn_model = torch.nn.Sequential(\n",
     "    torch.nn.Linear(28 * 28, 50),\n",
     "    torch.nn.ReLU(),\n",
-    "    torch.nn.Linear(50, 50),\n",
-    "    torch.nn.ReLU(),\n",
-    "    torch.nn.Linear(50, 10),\n",
+    "    torch.nn.Linear(50, 20),\n",
+    "    torch.nn.Sigmoid(),\n",
+    "    torch.nn.Linear(20, 10),\n",
     "    torch.nn.Softmax(1),\n",
     ")"
    ]
@@ -139,7 +148,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nn_regression = torch.nn.Sequential(*nn_model[:-1])"
+    "imageno = 10000\n",
+    "image = mnist_train.data[imageno, :]\n",
+    "plt.imshow(image, cmap=\"gray\")"
    ]
   },
   {
@@ -148,9 +159,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "imageno = 10000\n",
-    "image = mnist_train.data[imageno, :]\n",
-    "plt.imshow(image, cmap=\"gray\")"
+    "ex_prob = nn_model.forward(x_train[imageno:imageno+1, :])[0]"
    ]
   },
   {
@@ -159,7 +168,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ex_prob = nn_regression.forward(x_train[imageno, :])\n",
     "sorted_labels = torch.argsort(ex_prob)\n",
     "right_label = sorted_labels[-1]\n",
     "wrong_label = sorted_labels[-2]"
@@ -188,7 +196,7 @@
     "m.addConstr(abs_diff >= -x + image)\n",
     "m.addConstr(abs_diff.sum() <= delta)\n",
     "\n",
-    "pred_constr = add_predictor_constr(m, nn_regression, x, y)\n",
+    "pred_constr = add_predictor_constr(m, nn_model, x, y)\n",
     "\n",
     "pred_constr.print_stats()"
    ]
@@ -199,11 +207,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "m.Params.BestBdStop = 0.0\n",
-    "m.Params.BestObjStop = 0.0\n",
+    "m.Params.Obbt = 3\n",
     "m.optimize()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_constr.get_error()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/src/gurobi_ml/keras/keras.py b/src/gurobi_ml/keras/keras.py
@@ -77,7 +77,7 @@ def __init__(self, gp_model, predictor, input_vars, output_vars=None, **kwargs):
             if isinstance(step, keras.layers.Dense):
                 config = step.get_config()
                 activation = config["activation"]
-                if activation not in ("relu", "linear"):
+                if activation not in ("relu", "softmax", "sigmoid", "linear"):
                     raise NoModel(predictor, f"Unsupported activation {activation}")
             elif isinstance(step, keras.layers.ReLU):
                 if step.negative_slope != 0.0:
@@ -120,6 +120,8 @@ def _mip_model(self, **kwargs):
                 activation = config["activation"]
                 if activation == "linear":
                     activation = "identity"
+                if activation == "sigmoid":
+                    activation = "logistic"
                 weights, bias = step.get_weights()
                 layer = self._add_dense_layer(
                     _input,
diff --git a/src/gurobi_ml/torch/sequential.py b/src/gurobi_ml/torch/sequential.py
@@ -76,9 +76,15 @@ class SequentialConstr(BaseNNConstr):
     |ClassShort|.
     """
 
+    activations = {
+        nn.ReLU: "relu",
+        nn.Softmax: "softmax",
+        nn.Sigmoid: "logistic",
+    }
+
     def __init__(self, gp_model, predictor, input_vars, output_vars=None, **kwargs):
         for step in predictor:
-            if isinstance(step, nn.ReLU):
+            if isinstance(step, tuple(self.activations.keys())):
                 pass
             elif isinstance(step, nn.Linear):
                 pass
@@ -95,12 +101,7 @@ def _mip_model(self, **kwargs):
         for i, step in enumerate(network):
             if i == num_layers - 1:
                 output = self._output
-            if isinstance(step, nn.ReLU):
-                layer = self._add_activation_layer(
-                    _input, self.act_dict["relu"](), output, name=f"relu_{i}", **kwargs
-                )
-                _input = layer.output
-            elif isinstance(step, nn.Linear):
+            if isinstance(step, nn.Linear):
                 layer_weight = None
                 layer_bias = None
                 for name, param in step.named_parameters():
@@ -122,7 +123,17 @@ def _mip_model(self, **kwargs):
                     **kwargs,
                 )
                 _input = layer.output
-        if self._output is None:
+            else:
+                activation = self.activations[type(step)]
+                layer = self._add_activation_layer(
+                    _input,
+                    self.act_dict[activation](),
+                    output,
+                    name=f"{activation}_{i}",
+                    **kwargs,
+                )
+                _input = layer.output
+        if self.output is None:
             self._output = layer.output
 
     def get_error(self, eps=None):