diff --git a/api.py b/api.py index f822fbe2..f5380bd2 100644 --- a/api.py +++ b/api.py @@ -30,7 +30,7 @@ # Load our model into memory. # Please update this path to reflect your own trained model. static_model = load_model( - path_to_model='assets/trained-models/load_shortfall_simple_lm_regression.pkl') + path_to_model='assets/trained-models/model_final.pkl') print ('-'*40) print ('Model successfully loaded') diff --git a/assets/trained-models/model_final.pkl b/assets/trained-models/model_final.pkl new file mode 100644 index 00000000..a6525f36 Binary files /dev/null and b/assets/trained-models/model_final.pkl differ diff --git a/model.py b/model.py index 42f73063..059d056e 100644 --- a/model.py +++ b/model.py @@ -28,40 +28,14 @@ import json def _preprocess_data(data): - """Private helper function to preprocess data for model prediction. - - NB: If you have utilised feature engineering/selection in order to create - your final model you will need to define the code here. - - - Parameters - ---------- - data : str - The data payload received within POST requests sent to our API. - - Returns - ------- - Pandas DataFrame : - The preprocessed data, ready to be used our model for prediction. - """ + # Convert the json string to a python dictionary object feature_vector_dict = json.loads(data) - # Load the dictionary as a Pandas DataFrame. - feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict]) - - # --------------------------------------------------------------- - # NOTE: You will need to swap the lines below for your own data - # preprocessing methods. - # - # The code below is for demonstration purposes only. You will not - # receive marks for submitting this code in an unchanged state. - # --------------------------------------------------------------- - - # ----------- Replace this code with your own preprocessing steps -------- - predict_vector = feature_vector_df[['Madrid_wind_speed','Bilbao_rain_1h','Valencia_wind_speed']] - # ------------------------------------------------------------------------ - + feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict]) + + predict_vector = feature_vector_df[['Seville_wind_speed', 'Barcelona_wind_speed', 'Bilbao_rain_1h']] return predict_vector + def load_model(path_to_model:str): """Adapter function to load our pretrained model into memory. diff --git a/utils/LinearModel (1).ipynb b/utils/LinearModel (1).ipynb new file mode 100644 index 00000000..f38891f5 --- /dev/null +++ b/utils/LinearModel (1).ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "570048ef-2f6b-407f-bf2c-e14bd1b078fc", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'numpy'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/Users/mbalenhle/Desktop/explore-js4/utils/models/LinearModel (1).ipynb Cell 1'\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m \u001b[39m# A package used for dataframes that store a matrix of data\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m \u001b[39m# A package in order to import tables\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m \u001b[39m# A visualization package to make fancy plots\u001b[39;00m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'numpy'" + ] + } + ], + "source": [ + "import numpy as np # A package used for dataframes that store a matrix of data\n", + "import pandas as pd # A package in order to import tables\n", + "import matplotlib.pyplot as plt # A visualization package to make fancy plots\n", + "from matplotlib import rc\n", + "import math\n", + "import seaborn as sns # Also a visualisation package\n", + "\n", + "# for data preparation and for building modells\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.linear_model import Lasso\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn import metrics\n", + "from sklearn.linear_model import LinearRegression\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71981916-6315-4ca7-a0e8-821e11882da6", + "metadata": {}, + "outputs": [], + "source": [ + "#training data set:\n", + "train = pd.read_csv(\"df_train.csv\")\n", + "#testing data set\n", + "test= pd.read_csv(\"df_test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84b172f9", + "metadata": {}, + "outputs": [], + "source": [ + "train_y = train['load_shortfall_3h']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fb39070", + "metadata": {}, + "outputs": [], + "source": [ + "train_x = train[['Seville_wind_speed', 'Barcelona_wind_speed', 'Bilbao_rain_1h']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0231accc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_regression = LinearRegression()\n", + "lm_regression.fit(train_x,train_y)" + ] + }, + { + "cell_type": "markdown", + "id": "62726214", + "metadata": {}, + "source": [ + "### Saving model with Pickle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8564c2b7", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle \n", + "\n", + "model_save_path = 'model_final.pkl'\n", + "\n", + "with open(model_save_path, 'wb') as file:\n", + " pickle.dump(lm_regression, file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f8aec54", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + }, + "kernelspec": { + "display_name": "Python 3.8.10 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/utils/request.py b/utils/request.py index 99ab68da..be7ef106 100644 --- a/utils/request.py +++ b/utils/request.py @@ -37,7 +37,7 @@ # replace the URL below with its public IP: # url = 'http://{public-ip-address-of-remote-machine}:5000/api_v0.1' -url = 'http://127.0.0.1:5000/api_v0.1' +url = 'http://34.244.93.239:5000/api_v0.1' # Perform the POST request. print(f"Sending POST request to web server API at: {url}") diff --git a/utils/studentModel.py b/utils/studentModel.py new file mode 100644 index 00000000..56abae21 --- /dev/null +++ b/utils/studentModel.py @@ -0,0 +1,19 @@ +import pandas as pd +import pickle +from sklearn.linear_model import LinearRegression + +# Fetch training data and preprocess for modeling +train = pd.read_csv('./data/df_train.csv') + +y_train = train[['load_shortfall_3h']] +X_train = train[['Seville_wind_speed', 'Barcelona_wind_speed', 'Bilbao_rain_1h']] + +# Fit model +lm_regression = LinearRegression(normalize=True) +print ("Training Model...") +lm_regression.fit(X_train, y_train) + +# Pickle model for use within our API +save_path = '../assets/trained-models/final_model.pkl' +print (f"Training completed. Saving model to: {save_path}") +pickle.dump(lm_regression, open(save_path,'wb'))