Add softmax and sigmoid to Keras and Pytorch network

pobonomo · pobonomo · commit 327d178bcd5c · 2024-12-07T15:07:29.000+01:00
Missing tests and documentation
diff --git a/Generate_keras_test_network.ipynb b/Generate_keras_test_network.ipynb
@@ -0,0 +1,116 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8d57e80-9075-4d63-bc36-f9aaad08ea2f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d98d000-661e-4495-bef0-49c5eb180aff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nn = tf.keras.models.Sequential(\n",
+    "    [\n",
+    "     tf.keras.layers.InputLayer((8,)),\n",
+    "     tf.keras.layers.Dense(30, activation='relu'),\n",
+    "     tf.keras.layers.Dense(1),\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba3cf3ee-bd25-4180-95c0-2ff42d858a34",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nn.compile(loss='mean_squared_error', optimizer='adam')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "247bd200-8026-4f08-8739-9aabb3c37e99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.california_housing.load_data(\n",
+    "    version=\"large\"\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a29325dd-1ab1-4cce-81c0-2528e892adb6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "normalize = tf.keras.layers.Normalization(axis=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbecbd91-e100-4568-9424-efd9e3b6d5fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "normalize.adapt(X_train)\n",
+    "X_train = normalize(X_train)\n",
+    "X_test = normalize(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5656d2da-ee2d-4a8f-aef3-65876c20193b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nn.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "128b1ba9-55e9-4d78-9b31-d2a0da9bb165",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  },
+  "license": {
+   "full_text": "# Copyright © 2023 Gurobi Optimization, LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# =============================================================================="
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/adversarial/adversarial_keras.ipynb b/notebooks/adversarial/adversarial_keras.ipynb
@@ -89,9 +89,9 @@
     "nn = tf.keras.models.Sequential(\n",
     "    [\n",
     "        tf.keras.layers.InputLayer((28 * 28,)),\n",
-    "        tf.keras.layers.Dense(50, activation=\"relu\"),\n",
-    "        tf.keras.layers.Dense(50, activation=\"relu\"),\n",
-    "        tf.keras.layers.Dense(10),\n",
+    "        tf.keras.layers.Dense(20, activation=\"sigmoid\"),\n",
+    "        tf.keras.layers.Dense(20, activation=\"sigmoid\"),\n",
+    "        tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
     "    ]\n",
     ")"
    ]
@@ -118,7 +118,7 @@
     "nn.fit(\n",
     "    x_train,\n",
     "    y_train,\n",
-    "    epochs=6,\n",
+    "    epochs=4,\n",
     "    validation_data=(x_test, y_test),\n",
     ")"
    ]
@@ -257,7 +257,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.10"
   },
   "license": {
    "full_text": "# Copyright © 2023 Gurobi Optimization, LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# =============================================================================="
diff --git a/notebooks/adversarial/adversarial_pytorch.ipynb b/notebooks/adversarial/adversarial_pytorch.ipynb
@@ -42,8 +42,17 @@
     "import torchvision\n",
     "from skorch import NeuralNetClassifier\n",
     "\n",
-    "import gurobipy as gp\n",
-    "\n",
+    "import gurobipy as gp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
     "from gurobi_ml import add_predictor_constr"
    ]
   },
@@ -100,9 +109,9 @@
     "nn_model = torch.nn.Sequential(\n",
     "    torch.nn.Linear(28 * 28, 50),\n",
     "    torch.nn.ReLU(),\n",
-    "    torch.nn.Linear(50, 50),\n",
-    "    torch.nn.ReLU(),\n",
-    "    torch.nn.Linear(50, 10),\n",
+    "    torch.nn.Linear(50, 20),\n",
+    "    torch.nn.Sigmoid(),\n",
+    "    torch.nn.Linear(20, 10),\n",
     "    torch.nn.Softmax(1),\n",
     ")"
    ]
@@ -139,7 +148,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nn_regression = torch.nn.Sequential(*nn_model[:-1])"
+    "imageno = 10000\n",
+    "image = mnist_train.data[imageno, :]\n",
+    "plt.imshow(image, cmap=\"gray\")"
    ]
   },
   {
@@ -148,9 +159,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "imageno = 10000\n",
-    "image = mnist_train.data[imageno, :]\n",
-    "plt.imshow(image, cmap=\"gray\")"
+    "ex_prob = nn_model.forward(x_train[imageno:imageno+1, :])[0]"
    ]
   },
   {
@@ -159,7 +168,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ex_prob = nn_regression.forward(x_train[imageno, :])\n",
     "sorted_labels = torch.argsort(ex_prob)\n",
     "right_label = sorted_labels[-1]\n",
     "wrong_label = sorted_labels[-2]"
@@ -188,7 +196,7 @@
     "m.addConstr(abs_diff >= -x + image)\n",
     "m.addConstr(abs_diff.sum() <= delta)\n",
     "\n",
-    "pred_constr = add_predictor_constr(m, nn_regression, x, y)\n",
+    "pred_constr = add_predictor_constr(m, nn_model, x, y)\n",
     "\n",
     "pred_constr.print_stats()"
    ]
@@ -199,11 +207,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "m.Params.BestBdStop = 0.0\n",
-    "m.Params.BestObjStop = 0.0\n",
+    "m.Params.Obbt = 3\n",
     "m.optimize()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_constr.get_error()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/src/gurobi_ml/keras/keras.py b/src/gurobi_ml/keras/keras.py
@@ -77,7 +77,7 @@ def __init__(self, gp_model, predictor, input_vars, output_vars=None, **kwargs):
             if isinstance(step, keras.layers.Dense):
                 config = step.get_config()
                 activation = config["activation"]
-                if activation not in ("relu", "linear"):
+                if activation not in ("relu", "softmax", "sigmoid", "linear"):
                     raise NoModel(predictor, f"Unsupported activation {activation}")
             elif isinstance(step, keras.layers.ReLU):
                 if step.negative_slope != 0.0:
@@ -120,6 +120,8 @@ def _mip_model(self, **kwargs):
                 activation = config["activation"]
                 if activation == "linear":
                     activation = "identity"
+                if activation == "sigmoid":
+                    activation = "logistic"
                 weights, bias = step.get_weights()
                 layer = self._add_dense_layer(
                     _input,
diff --git a/src/gurobi_ml/torch/sequential.py b/src/gurobi_ml/torch/sequential.py
@@ -76,9 +76,15 @@ class SequentialConstr(BaseNNConstr):
     |ClassShort|.
     """
 
+    activations = {
+        nn.ReLU: "relu",
+        nn.Softmax: "softmax",
+        nn.Sigmoid: "logistic",
+    }
+
     def __init__(self, gp_model, predictor, input_vars, output_vars=None, **kwargs):
         for step in predictor:
-            if isinstance(step, nn.ReLU):
+            if isinstance(step, tuple(self.activations.keys())):
                 pass
             elif isinstance(step, nn.Linear):
                 pass
@@ -95,12 +101,7 @@ def _mip_model(self, **kwargs):
         for i, step in enumerate(network):
             if i == num_layers - 1:
                 output = self._output
-            if isinstance(step, nn.ReLU):
-                layer = self._add_activation_layer(
-                    _input, self.act_dict["relu"](), output, name=f"relu_{i}", **kwargs
-                )
-                _input = layer.output
-            elif isinstance(step, nn.Linear):
+            if isinstance(step, nn.Linear):
                 layer_weight = None
                 layer_bias = None
                 for name, param in step.named_parameters():
@@ -122,7 +123,17 @@ def _mip_model(self, **kwargs):
                     **kwargs,
                 )
                 _input = layer.output
-        if self._output is None:
+            else:
+                activation = self.activations[type(step)]
+                layer = self._add_activation_layer(
+                    _input,
+                    self.act_dict[activation](),
+                    output,
+                    name=f"{activation}_{i}",
+                    **kwargs,
+                )
+                _input = layer.output
+        if self.output is None:
             self._output = layer.output
 
     def get_error(self, eps=None):
diff --git a/tests/test_keras/keras_cases.py b/tests/test_keras/keras_cases.py