deploy: 966f5d1

JesperDramsch · Feb 11, 2024 · a5909ab · a5909ab
1 parent 62c69e1
commit a5909ab
Show file tree

Hide file tree

Showing 5 changed files with 103 additions and 79 deletions.
diff --git a/_sources/notebooks/5-interpretability.ipynb b/_sources/notebooks/5-interpretability.ipynb
@@ -547,7 +547,15 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": []
+   "source": [
+    "We can see that the permutation importance gives a lower weight to `Culmen Depth` in the Random Forest and a slightly higher importance to `Sex`.\n",
+    "\n",
+    "Overall it's similar in that `Culmen Length` is still the most important and `Flipper length` is the second most important, while the relative importance changes somewhat. \n",
+    "\n",
+    "These differences can be much more pronounced in more complex models.\n",
+    "\n",
+    "The really neat feature is however, that we can apply this to the SVM model, which does not have internal importances!"
+   ]
   },
   {
    "cell_type": "code",
@@ -583,14 +591,23 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we can see that `Culmen length` is still the most important and `Sex` is mostly unimportant, but the relative importances of `Culmen depth` and `Flipper length` change."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "fi_rf_test = pd.Series(permutation_importance(rf, X_test, y_test, n_repeats=10, random_state=42), index=features)\n",
-    "fi_svm_test = pd.Series(permutation_importance(model, X_test, y_test, n_repeats=10, random_state=42), index=features)\n",
+    "result = permutation_importance(rf, X_test, y_test, n_repeats=10, random_state=42)\n",
+    "fi_rf_test = pd.Series(result, index=features)\n",
+    "result = permutation_importance(model, X_test, y_test, n_repeats=10, random_state=42)\n",
+    "fi_svm_test = pd.Series(result, index=features)\n",
     "\n",
     "pd.DataFrame({\"RF Train\": fi_rf_train, \"SVM Train\": fi_svm_train, \"RF Test\": fi_rf_test, \"SVM Test\": fi_svm_test}).plot.bar()"
    ]

diff --git a/notebooks/0-basic-data-prep-and-model.html b/notebooks/0-basic-data-prep-and-model.html
@@ -984,38 +984,38 @@ <h2>Machine Learning<a class="headerlink" href="#machine-learning" title="Permal
   </thead>
   <tbody>
     <tr>
-      <th>34</th>
-      <td>36.4</td>
-      <td>17.0</td>
-      <td>195.0</td>
+      <th>230</th>
+      <td>45.2</td>
+      <td>13.8</td>
+      <td>215.0</td>
       <td>FEMALE</td>
     </tr>
     <tr>
-      <th>243</th>
-      <td>52.2</td>
-      <td>17.1</td>
-      <td>228.0</td>
-      <td>MALE</td>
+      <th>276</th>
+      <td>46.5</td>
+      <td>17.9</td>
+      <td>192.0</td>
+      <td>FEMALE</td>
     </tr>
     <tr>
-      <th>312</th>
-      <td>47.6</td>
-      <td>18.3</td>
-      <td>195.0</td>
+      <th>15</th>
+      <td>36.6</td>
+      <td>17.8</td>
+      <td>185.0</td>
       <td>FEMALE</td>
     </tr>
     <tr>
-      <th>285</th>
-      <td>51.3</td>
-      <td>19.9</td>
-      <td>198.0</td>
-      <td>MALE</td>
+      <th>66</th>
+      <td>35.5</td>
+      <td>16.2</td>
+      <td>195.0</td>
+      <td>FEMALE</td>
     </tr>
     <tr>
-      <th>46</th>
-      <td>41.1</td>
-      <td>19.0</td>
-      <td>182.0</td>
+      <th>61</th>
+      <td>41.3</td>
+      <td>21.1</td>
+      <td>195.0</td>
       <td>MALE</td>
     </tr>
     <tr>
@@ -1026,38 +1026,38 @@ <h2>Machine Learning<a class="headerlink" href="#machine-learning" title="Permal
       <td>...</td>
     </tr>
     <tr>
-      <th>44</th>
-      <td>37.0</td>
-      <td>16.9</td>
-      <td>185.0</td>
+      <th>157</th>
+      <td>46.5</td>
+      <td>13.5</td>
+      <td>210.0</td>
       <td>FEMALE</td>
     </tr>
     <tr>
-      <th>236</th>
-      <td>44.9</td>
-      <td>13.8</td>
+      <th>234</th>
+      <td>47.4</td>
+      <td>14.6</td>
       <td>212.0</td>
       <td>FEMALE</td>
     </tr>
     <tr>
-      <th>83</th>
-      <td>35.1</td>
-      <td>19.4</td>
-      <td>193.0</td>
-      <td>MALE</td>
+      <th>32</th>
+      <td>39.5</td>
+      <td>17.8</td>
+      <td>188.0</td>
+      <td>FEMALE</td>
     </tr>
     <tr>
-      <th>31</th>
-      <td>37.2</td>
-      <td>18.1</td>
-      <td>178.0</td>
+      <th>35</th>
+      <td>39.2</td>
+      <td>21.1</td>
+      <td>196.0</td>
       <td>MALE</td>
     </tr>
     <tr>
-      <th>121</th>
-      <td>37.7</td>
-      <td>19.8</td>
-      <td>198.0</td>
+      <th>153</th>
+      <td>50.0</td>
+      <td>16.3</td>
+      <td>230.0</td>
       <td>MALE</td>
     </tr>
   </tbody>
@@ -1095,48 +1095,48 @@ <h2>Machine Learning<a class="headerlink" href="#machine-learning" title="Permal
   </thead>
   <tbody>
     <tr>
-      <th>34</th>
-      <td>Adelie Penguin (Pygoscelis adeliae)</td>
-    </tr>
-    <tr>
-      <th>243</th>
+      <th>230</th>
       <td>Gentoo penguin (Pygoscelis papua)</td>
     </tr>
     <tr>
-      <th>312</th>
+      <th>276</th>
       <td>Chinstrap penguin (Pygoscelis antarctica)</td>
     </tr>
     <tr>
-      <th>285</th>
-      <td>Chinstrap penguin (Pygoscelis antarctica)</td>
+      <th>15</th>
+      <td>Adelie Penguin (Pygoscelis adeliae)</td>
+    </tr>
+    <tr>
+      <th>66</th>
+      <td>Adelie Penguin (Pygoscelis adeliae)</td>
     </tr>
     <tr>
-      <th>46</th>
+      <th>61</th>
       <td>Adelie Penguin (Pygoscelis adeliae)</td>
     </tr>
     <tr>
       <th>...</th>
       <td>...</td>
     </tr>
     <tr>
-      <th>44</th>
-      <td>Adelie Penguin (Pygoscelis adeliae)</td>
+      <th>157</th>
+      <td>Gentoo penguin (Pygoscelis papua)</td>
     </tr>
     <tr>
-      <th>236</th>
+      <th>234</th>
       <td>Gentoo penguin (Pygoscelis papua)</td>
     </tr>
     <tr>
-      <th>83</th>
+      <th>32</th>
       <td>Adelie Penguin (Pygoscelis adeliae)</td>
     </tr>
     <tr>
-      <th>31</th>
+      <th>35</th>
       <td>Adelie Penguin (Pygoscelis adeliae)</td>
     </tr>
     <tr>
-      <th>121</th>
-      <td>Adelie Penguin (Pygoscelis adeliae)</td>
+      <th>153</th>
+      <td>Gentoo penguin (Pygoscelis papua)</td>
     </tr>
   </tbody>
 </table>
@@ -1264,7 +1264,7 @@ <h3>Model Training<a class="headerlink" href="#model-training" title="Permalink
 </div>
 </div>
 <div class="cell_output docutils container">
-<div class="output text_plain highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>0.9871244635193133
+<div class="output text_plain highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>0.9914163090128756
 </pre></div>
 </div>
 </div>

diff --git a/notebooks/1-model-evaluation.html b/notebooks/1-model-evaluation.html
@@ -1146,8 +1146,8 @@ <h2><span class="section-number">1.3.5. </span>Choosing the appropriate Evaluati
 </div>
 </div>
 <div class="cell_output docutils container">
-<div class="output text_plain highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>{&#39;fit_time&#39;: array([0.00599885, 0.00516748, 0.00515461, 0.00513673, 0.00516844]),
- &#39;score_time&#39;: array([0.00435352, 0.00395489, 0.00400567, 0.00393963, 0.00396037]),
+<div class="output text_plain highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>{&#39;fit_time&#39;: array([0.00631857, 0.00540304, 0.0053215 , 0.00532579, 0.00522256]),
+ &#39;score_time&#39;: array([0.00442171, 0.00407219, 0.00416684, 0.0040834 , 0.00409722]),
  &#39;test_MCC&#39;: array([0.37796447, 0.27863911, 0.40824829, 0.02424643, 0.08625819]),
  &#39;test_ACC&#39;: array([0.73333333, 0.7       , 0.76666667, 0.66666667, 0.62068966])}
 </pre></div>

diff --git a/notebooks/5-interpretability.html b/notebooks/5-interpretability.html
diff --git a/searchindex.js b/searchindex.js