Explore-AI · mmalinga · May 26, 2022 · May 26, 2022 · May 26, 2022 · May 26, 2022
diff --git a/api.py b/api.py
@@ -30,7 +30,7 @@
 # Load our model into memory.
 # Please update this path to reflect your own trained model.
 static_model = load_model(
-    path_to_model='assets/trained-models/load_shortfall_simple_lm_regression.pkl')
+    path_to_model='assets/trained-models/model_final.pkl')
 
 print ('-'*40)
 print ('Model successfully loaded')

diff --git a/assets/trained-models/model_final.pkl b/assets/trained-models/model_final.pkl
diff --git a/model.py b/model.py
@@ -28,40 +28,14 @@
 import json
 
 def _preprocess_data(data):
-    """Private helper function to preprocess data for model prediction.
-
-    NB: If you have utilised feature engineering/selection in order to create
-    your final model you will need to define the code here.
-
-
-    Parameters
-    ----------
-    data : str
-        The data payload received within POST requests sent to our API.
-
-    Returns
-    -------
-    Pandas DataFrame : <class 'pandas.core.frame.DataFrame'>
-        The preprocessed data, ready to be used our model for prediction.
-    """
+
     # Convert the json string to a python dictionary object
     feature_vector_dict = json.loads(data)
-    # Load the dictionary as a Pandas DataFrame.
-    feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict])
-
-    # ---------------------------------------------------------------
-    # NOTE: You will need to swap the lines below for your own data
-    # preprocessing methods.
-    #
-    # The code below is for demonstration purposes only. You will not
-    # receive marks for submitting this code in an unchanged state.
-    # ---------------------------------------------------------------
-
-    # ----------- Replace this code with your own preprocessing steps --------
-    predict_vector = feature_vector_df[['Madrid_wind_speed','Bilbao_rain_1h','Valencia_wind_speed']]
-    # ------------------------------------------------------------------------
-
+    feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict]) 
+
+    predict_vector = feature_vector_df[['Seville_wind_speed', 'Barcelona_wind_speed', 'Bilbao_rain_1h']]
     return predict_vector
+
 
 def load_model(path_to_model:str):
     """Adapter function to load our pretrained model into memory.

diff --git a/utils/LinearModel (1).ipynb b/utils/LinearModel (1).ipynb
@@ -0,0 +1,150 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "570048ef-2f6b-407f-bf2c-e14bd1b078fc",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'numpy'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[1;32m/Users/mbalenhle/Desktop/explore-js4/utils/models/LinearModel (1).ipynb Cell 1'\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/mbalenhle/Desktop/explore-js4/utils/models/LinearModel%20%281%29.ipynb#ch0000000?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m \u001b[39m# A package used for dataframes that store  a matrix of data\u001b[39;00m\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/mbalenhle/Desktop/explore-js4/utils/models/LinearModel%20%281%29.ipynb#ch0000000?line=1'>2</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m \u001b[39m# A package in order to import tables\u001b[39;00m\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/mbalenhle/Desktop/explore-js4/utils/models/LinearModel%20%281%29.ipynb#ch0000000?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m \u001b[39m# A visualization package to make fancy plots\u001b[39;00m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'numpy'"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np # A package used for dataframes that store  a matrix of data\n",
+    "import pandas as pd # A package in order to import tables\n",
+    "import matplotlib.pyplot as plt # A visualization package to make fancy plots\n",
+    "from matplotlib import rc\n",
+    "import math\n",
+    "import seaborn as sns # Also a visualisation package\n",
+    "\n",
+    "# for data preparation and for building modells\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "from sklearn.linear_model import Ridge\n",
+    "from sklearn.linear_model import Lasso\n",
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "from sklearn import metrics\n",
+    "from sklearn.linear_model import LinearRegression\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71981916-6315-4ca7-a0e8-821e11882da6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#training data set:\n",
+    "train = pd.read_csv(\"df_train.csv\")\n",
+    "#testing data set\n",
+    "test= pd.read_csv(\"df_test.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84b172f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_y = train['load_shortfall_3h']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8fb39070",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_x = train[['Seville_wind_speed', 'Barcelona_wind_speed', 'Bilbao_rain_1h']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0231accc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "execution_count": 133,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lm_regression = LinearRegression()\n",
+    "lm_regression.fit(train_x,train_y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "62726214",
+   "metadata": {},
+   "source": [
+    "### Saving model with Pickle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8564c2b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle \n",
+    "\n",
+    "model_save_path = 'model_final.pkl'\n",
+    "\n",
+    "with open(model_save_path, 'wb') as file:\n",
+    "    pickle.dump(lm_regression, file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f8aec54",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.8.10 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/utils/request.py b/utils/request.py
@@ -37,7 +37,7 @@
 # replace the URL below with its public IP:
 
 # url = 'http://{public-ip-address-of-remote-machine}:5000/api_v0.1'
-url = 'http://127.0.0.1:5000/api_v0.1'
+url = 'http://34.244.93.239:5000/api_v0.1'
 
 # Perform the POST request.
 print(f"Sending POST request to web server API at: {url}")

diff --git a/utils/studentModel.py b/utils/studentModel.py
@@ -0,0 +1,19 @@
+import pandas as pd
+import pickle
+from sklearn.linear_model import LinearRegression
+
+# Fetch training data and preprocess for modeling
+train = pd.read_csv('./data/df_train.csv')
+
+y_train = train[['load_shortfall_3h']]
+X_train = train[['Seville_wind_speed', 'Barcelona_wind_speed', 'Bilbao_rain_1h']]
+
+# Fit model
+lm_regression = LinearRegression(normalize=True)
+print ("Training Model...")
+lm_regression.fit(X_train, y_train)
+
+# Pickle model for use within our API
+save_path = '../assets/trained-models/final_model.pkl'
+print (f"Training completed. Saving model to: {save_path}")
+pickle.dump(lm_regression, open(save_path,'wb'))