Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# machinelearningregression
boilerplate code, scripts, modules, data for Regression Analysis workshop

Before starting check that your working environment is ready running the `runme.py`:

```
$ python runme.py
```

If the following message appears:

```
Good to go, all packages installed ok, ready to code.
```

You're ready fot the workshop!
64 changes: 9 additions & 55 deletions functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#cca regression functions
# Advanced functions
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
Expand All @@ -23,23 +23,20 @@ def model_plot_3d(ax, model, x1, x2):
positions = np.vstack([X1.ravel(), X2.ravel()]).T
y = model.predict(positions)
Y = y.reshape(X1.shape)
ax.plot_surface(X1, X2, Y, alpha = 0.2 , cmap = 'jet')

def save_figure(name, i):
plt.savefig('../figures/'+name+str(i), bbox_inches='tight')
plt.gcf().clear()
ax.plot_surface(X1, X2, Y, alpha = 0.2 , cmap = 'jet',\
linewidth=0.5, rstride=1, cstride=1, shade=True)

def PolynomialRegression(degree = 1):
return make_pipeline(PolynomialFeatures(degree = degree), LinearRegression())
return make_pipeline(PolynomialFeatures(degree = degree,\
include_bias = False), LinearRegression())

def PolynomialRidge(degree = 1, alpha = 1):
return make_pipeline(PolynomialFeatures(degree = degree), StandardScaler(), Ridge(alpha = alpha))
return make_pipeline(PolynomialFeatures(degree = degree,\
include_bias = False), StandardScaler(), Ridge(alpha = alpha))

def PolynomialLasso(degree = 1, alpha = 1):
return make_pipeline(PolynomialFeatures(degree = degree), StandardScaler(), Lasso(alpha = alpha))



return make_pipeline(PolynomialFeatures(degree = degree,\
include_bias = False), StandardScaler(), Lasso(alpha = alpha))

def polynomial_residual(degree, X, y):
polynomial_regression= PolynomialRegression(degree = degree)
Expand All @@ -48,54 +45,11 @@ def polynomial_residual(degree, X, y):
mae = mean_absolute_error(y, y_pred)
return mae


def organize_data(to_forecast, window, horizon):
"""
Input:
to_forecast, univariate time series organized as numpy array
window, number of items to use in the forecast window
horizon, horizon of the forecast
Output:
X, a matrix where each row contains a forecast window
y, the target values for each row of X
"""
shape = to_forecast.shape[:-1] + (to_forecast.shape[-1] - window + 1, window)
strides = to_forecast.strides + (to_forecast.strides[-1],)
X = np.lib.stride_tricks.as_strided(to_forecast,
shape=shape,
strides=strides)
y = np.array([X[i+horizon][-1] for i in range(len(X)-horizon)])
return X[:-horizon], y


class NonLinearRegression(object):

def __init__(self, fun):
self.fun = fun
self._find_n_params()

def fit(self, X, y):
self.params = basinhopping(lambda p: self._get_residual(X,y,p), np.random.randn(self._n_params), niter = 10000, niter_success = 50)['x']

def predict(self, X):
return self.fun(X.flatten(), self.params)

def get_params(self, deep=True):
return {"fun": self.fun}

def set_params(self, **parameters):
for parameter, value in parameters.items():
self.setattr(parameter, value)
return self

def _get_residual(self, X, y, p):
return np.mean((self.fun(X.flatten(), p)-y)**2)

def _find_n_params(self):
for n_params in range(1,100)[::-1]:
try:
self.fun(1,np.random.rand(n_params))
except:
n_params += 1
break
self._n_params = n_params
Binary file removed functions.pyc
Binary file not shown.
14 changes: 4 additions & 10 deletions module3.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
from functions import save_figure
from os.path import splitext
figname = splitext(__file__)[0]+'_'
ifig = 0
# Module 3: Linear regression

# New imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn import metrics

bikes_df = pd.read_csv('./data/bikes_subsampled.csv')

# We select the variables temperature and bikes_count

temperature = bikes_df['temperature'].values
bikes_count = bikes_df['count'].values
# Code after this
27 changes: 9 additions & 18 deletions module4.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,18 @@
from functions import save_figure
from os.path import splitext
figname = splitext(__file__)[0]+'_'
ifig = 0

################################################################################
################################### MODULE 4 ###################################
##################### Multiple variables linear regression #####################
################################################################################
# Module 4: Multiple and polynomial regression

# Previous imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from mpl_toolkits.mplot3d.axes3d import Axes3D
from sklearn.preprocessing import PolynomialFeatures

from functions import PolynomialRegression, model_plot_3d

# New imports
from functions import PolynomialRegression
from mpl_toolkits.mplot3d.axes3d import Axes3D
from functions import model_plot_3d
from functions import polynomial_residual

# Load dataset
bikes_df = pd.read_csv('./data/bikes_subsampled.csv')

# Learning activity 1: Fit a model of 2 variables and plot the model

features = ['temperature','humidity']
X = bikes_df[features].values
y = bikes_df['count'].values
# Code after this
18 changes: 7 additions & 11 deletions module5.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
from functions import save_figure
from os.path import splitext
figname = splitext(__file__)[0]+'_'
ifig = 0

################################################################################
################################### MODULE 5 ###################################
############################# Model evaluation #################################
################################################################################
# To be separated in a unique file #
# Module 5: Evaluating model performance

# Previous imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from functions import PolynomialRegression
from sklearn.preprocessing import PolynomialFeatures

# New imports
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_error

# Load dataset
bikes_df = pd.read_csv('./data/bikes_subsampled.csv')
temperature = bikes_df[['temperature']].values
bikes_count = bikes_df['count'].values

# Code after this
30 changes: 11 additions & 19 deletions module6.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
from functions import save_figure
from os.path import splitext
figname = splitext(__file__)[0]+'_'
ifig = 0
# Module 6: Avoid overfitting with regularisation


################################################################################
################################### MODULE 6 ###################################
############################# Regularisation ###################################
################################################################################


# Learning activity 2: Ridge and Lasso regularisations
# Previous imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import Ridge, Lasso
from sklearn.linear_model import LinearRegression
from functions import PolynomialRegression
from sklearn.cross_validation import cross_val_score

from functions import PolynomialRidge, PolynomialLasso, PolynomialRegression
# New imports
from functions import PolynomialRidge, PolynomialLasso
from sklearn.grid_search import GridSearchCV

# Load dataset
bikes_df = pd.read_csv('./data/bikes.csv')
features = ['temperature','humidity','windspeed']
X = bikes_df[features].values
y = bikes_df['count'].values

# Code after this
33 changes: 10 additions & 23 deletions module7.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,17 @@
from functions import save_figure
from os.path import splitext
figname = splitext(__file__)[0]+'_'
ifig = 0
# Module 7: Predict the future with autoregression

################################################################################
################################### MODULE 7 ###################################
############################# Advanced fitting methods #########################
################################################################################

#Learning activity 1: use any sklearn model

from sklearn.tree import DecisionTreeRegressor
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
import numpy as np
# Previous imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

from functions import NonLinearRegression, organize_data

# New imports
from functions import organize_data

#Learning activity 2: Custom nonlinear regression
# Load dataset
bikes_df = pd.read_csv('./data/bikes.csv')
temperature = bikes_df[['temperature']].values
bikes = bikes_df['count'].values

# Code after this
14 changes: 5 additions & 9 deletions runme.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import pandas
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from sklearn import metrics
import functions
from sklearn import linear_model
from sklearn import cross_validation
from sklearn.grid_search import GridSearchCV
import module3
import module4
import module5
import module6
import module7

print "Good to go, all packages installed ok, ready to code."