Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
364 changes: 358 additions & 6 deletions README.md

Large diffs are not rendered by default.

Binary file added elasticnet/.DS_Store
Binary file not shown.
10,001 changes: 10,001 additions & 0 deletions elasticnet/Data.csv

Large diffs are not rendered by default.

1,197 changes: 1,197 additions & 0 deletions elasticnet/Notebook.ipynb

Large diffs are not rendered by default.

134 changes: 134 additions & 0 deletions elasticnet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import pandas as pd
import numpy as np
import csv
import os
from models import LassoModel, RidgeModel, ElasticNetModel, RegularizedRegression
from models.decision_tree import DecisionTreeRegressor


class KNNModel:
def __init__(self, k=3):
self.k = k

def fit(self, X, y):
self.X_train = X
self.y_train = y

def predict(self, X):
predictions = []
for x in X:
distances = np.sqrt(np.sum((self.X_train - x) ** 2, axis=1)) # Euclidean distance
k_indices = distances.argsort()[:self.k] # Get indices of k closest points
k_nearest_labels = self.y_train[k_indices]
# Majority vote (regression case - take the mean)
prediction = np.mean(k_nearest_labels)
predictions.append(prediction)
return np.array(predictions)

def run_models():
for i in range(3):
if i == 1:
print("\n ")
print("----------------------------WHITE WINE----------------------------")
print("\n ")
data = pd.read_csv('winequality-white.csv', sep=';')
X = data.drop(columns='quality').values
y = data['quality'].values
elif i == 0:
print("\n ")
print("----------------------------DIABETES----------------------------")
print("\n ")
data = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv', header=None)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
elif i == 2:
print("\n ")
print("----------------------------GENERATED DATA----------------------------")
print("\n ")
data = pd.read_csv('Data.csv')
X = data.drop(columns='y').values
y = data['y'].values

data = data.dropna()

X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X_scaled = (X - X_mean) / X_std
X_b = np.c_[np.ones((X_scaled.shape[0], 1)), X_scaled] # Add bias term

models = [
("Linear Regression", RegularizedRegression(), {}),
("Ridge Regression", RidgeModel(lambda_l2=0.01, alpha=0.001, num_iterations=1000), {}),
("Lasso Regression", LassoModel(lambda_l1=0.01, alpha=0.001, num_iterations=1000), {}),
("Elastic Net Regression", ElasticNetModel(lambda_l1=0.01, lambda_l2=0.01, alpha=0.001, num_iterations=1000), {}),
("KNN Regression", KNNModel(k=5), {})
]

for model_name, model, _ in models:
model.fit(X_b, y)
y_pred = model.predict(X_b)
mse = (np.square(y - y_pred)).mean(axis=None)
r2 = 1 - np.sum((y - y_pred) ** 2) / np.sum((y - np.mean(y)) ** 2)

# Print and store the results for each model
print(f"{model_name} MSE: {mse:.4f}, R²: {r2:.4f}")


def calculate_r2(y_true, y_pred):
ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
ss_residual = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
return r2

def test_predict():
print("\n ")
print("----------------------------Test Data----------------------------")
print("\n ")

models = {
"Lasso": LassoModel(),
"Ridge": RidgeModel(),
"KNN": KNNModel(),
"ElasticNet": ElasticNetModel()
}

data = []

current_dir = os.getcwd()

file_path = os.path.join(current_dir, "tests", "test.csv")
try:
with open(file_path, "r") as file:
reader = csv.DictReader(file)
for row in reader:
data.append(row)
except FileNotFoundError:
print(f"File not found: {file_path}")
return

X = np.array([[float(v) for k, v in datum.items() if k.startswith('x')] for datum in data])
y = np.array([float(datum['y']) for datum in data])

for name, model in models.items():
print("\n ")
print(f"Testing {name}")
print("\n ")
model.fit(X, y)
preds = model.predict(X)
print(f"Predictions for {name}: {preds}")

# Calculating Mean Squared Error (MSE)
mse = np.mean((preds - y) ** 2)
print(f"Mean Squared Error for {name}: {mse}")

# Calculate R² score
r2 = calculate_r2(y, preds)
print(f"R² for {name}: {r2:.4f}")


assert mse < 22.0, f"High MSE for {name}: MSE = {mse}"


if __name__ == "__main__":
run_models() # Run model training
test_predict() # Run prediction test
32 changes: 23 additions & 9 deletions elasticnet/models/ElasticNet.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
import numpy as np
from models.linear_regression import RegularizedRegression

class ElasticNetModel(RegularizedRegression):
def __init__(self, lambda_l1=0.01, lambda_l2=0.01, alpha=0.001, num_iterations=1000):
super().__init__(regularization='elastic_net', lambda_l1=lambda_l1, lambda_l2=lambda_l2, alpha=alpha, num_iterations=num_iterations)
self.parameters = None

class ElasticNetModel():
def __init__(self):
pass
def elastic_net_loss(self, parameters, features, labels):
mse_loss = self.linear_loss(parameters, features, labels)
l1_loss = self.lambda_l1 * np.sum(np.abs(parameters))
l2_loss = self.lambda_l2 * np.linalg.norm(parameters) ** 2
return mse_loss + l1_loss + l2_loss

def elastic_net_gradient(self, parameters, features, labels):
grad = self.linear_gradient(parameters, features, labels)
l1_grad = self.lambda_l1 * np.sign(parameters)
l2_grad = 2 * self.lambda_l2 * parameters
return grad + l1_grad + l2_grad

def fit(self, X, y):
return ElasticNetModelResults()


def predict(self, X):
return np.dot(X, self.parameters)

class ElasticNetModelResults():
def __init__(self):
pass
class ElasticNetModelResults:
def __init__(self, parameters):
self.parameters = parameters

def predict(self, x):
return 0.5
return np.dot(x, self.parameters)
7 changes: 7 additions & 0 deletions elasticnet/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# models/__init__.py
from .linear_regression import RegularizedRegression
from .lasso import LassoModel
from .ridge import RidgeModel
from .ElasticNet import ElasticNetModel


81 changes: 81 additions & 0 deletions elasticnet/models/decision_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import numpy as np

class DecisionTreeRegressor:
def __init__(self, min_samples_split=2, max_depth=10):
self.min_samples_split = min_samples_split
self.max_depth = max_depth
self.tree = None

def fit(self, X, y):
self.tree = self._build_tree(X, y)

def _build_tree(self, X, y, depth=0):
num_samples, num_features = X.shape
if num_samples >= self.min_samples_split and depth < self.max_depth:
best_split = self._get_best_split(X, y, num_features)
if best_split['variance_reduction'] > 0:
left_subtree = self._build_tree(best_split['X_left'], best_split['y_left'], depth + 1)
right_subtree = self._build_tree(best_split['X_right'], best_split['y_right'], depth + 1)
return {
'feature_index': best_split['feature_index'],
'threshold': best_split['threshold'],
'left': left_subtree,
'right': right_subtree
}
return np.mean(y)

def _get_best_split(self, X, y, num_features):
best_split = {}
max_variance_reduction = -float('inf')

for feature_index in range(num_features):
feature_values = X[:, feature_index]
possible_thresholds = np.unique(feature_values)
for threshold in possible_thresholds:
X_left, y_left, X_right, y_right = self._split(X, y, feature_index, threshold)

if len(y_left) > 0 and len(y_right) > 0:
variance_reduction = self._calculate_variance_reduction(y, y_left, y_right)

if variance_reduction > max_variance_reduction:
max_variance_reduction = variance_reduction
best_split = {
'feature_index': feature_index,
'threshold': threshold,
'X_left': X_left,
'y_left': y_left,
'X_right': X_right,
'y_right': y_right,
'variance_reduction': variance_reduction
}

return best_split

def _split(self, X, y, feature_index, threshold):
X_left = X[X[:, feature_index] <= threshold]
y_left = y[X[:, feature_index] <= threshold]
X_right = X[X[:, feature_index] > threshold]
y_right = y[X[:, feature_index] > threshold]
return X_left, y_left, X_right, y_right

def _calculate_variance_reduction(self, y, y_left, y_right):
weight_left = len(y_left) / len(y)
weight_right = len(y_right) / len(y)
reduction = np.var(y) - (weight_left * np.var(y_left) + weight_right * np.var(y_right))
return reduction

def predict(self, X):
return np.array([self._predict_single_input(x, self.tree) for x in X])

def _predict_single_input(self, x, tree):
if isinstance(tree, dict):
feature_index = tree['feature_index']
threshold = tree['threshold']

if x[feature_index] <= threshold:
return self._predict_single_input(x, tree['left'])
else:
return self._predict_single_input(x, tree['right'])
else:
return tree

16 changes: 16 additions & 0 deletions elasticnet/models/lasso.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np
from models.linear_regression import RegularizedRegression

class LassoModel(RegularizedRegression):
def __init__(self, lambda_l1=0.01, alpha=0.001, num_iterations=1000):
super().__init__(regularization='lasso', lambda_l1=lambda_l1, alpha=alpha, num_iterations=num_iterations)

def lasso_loss(self, parameters, features, labels):
return self.linear_loss(parameters, features, labels) + self.lambda_l1 * np.sum(np.abs(parameters))

def lasso_gradient(self, parameters, features, labels):
grad = self.linear_gradient(parameters, features, labels)
grad += self.lambda_l1 * np.sign(parameters)
return grad
def predict(self, X):
return np.dot(X, self.parameters)
60 changes: 60 additions & 0 deletions elasticnet/models/linear_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np
import matplotlib.pyplot as plt # type: ignore

class RegularizedRegression:
def __init__(self, regularization='none', lambda_l1=0.0, lambda_l2=0.0, alpha=0.001, num_iterations=1000):
self.regularization = regularization
self.lambda_l1 = lambda_l1
self.lambda_l2 = lambda_l2
self.alpha = alpha
self.num_iterations = num_iterations
self.parameters = None

def linear_loss(self, parameters, features, labels):
N = len(labels)
predictions = np.dot(features, parameters)
loss = np.sum((predictions - labels) ** 2) / (2 * N)
return loss

def linear_gradient(self, parameters, features, labels):
N = len(labels)
predictions = np.dot(features, parameters)
grad = (1 / N) * np.dot(features.T, (predictions - labels))
return grad

def fit(self, X, y):

initial_parameters = np.zeros(X.shape[1])

if self.regularization == 'lasso':
loss_function = self.lasso_loss
gradient_function = self.lasso_gradient
elif self.regularization == 'ridge':
loss_function = self.ridge_loss
gradient_function = self.ridge_gradient
elif self.regularization == 'elastic_net':
loss_function = self.elastic_net_loss
gradient_function = self.elastic_net_gradient
else:
loss_function = self.linear_loss
gradient_function = self.linear_gradient

self.parameters = initial_parameters.copy()
iteration_list, loss_list = [], []

for i in range(self.num_iterations):
grad = gradient_function(self.parameters, X, y)
self.parameters -= self.alpha * grad
loss = loss_function(self.parameters, X, y)
iteration_list.append(i)
loss_list.append(loss)


# plt.plot(iteration_list, loss_list, 'ob', linestyle='solid', color='red')
# plt.xlabel("Iterations")
# plt.ylabel("Loss")
# plt.title(f"Loss Function ({self.regularization.capitalize()})")
# plt.show()

def predict(self, X):
return np.dot(X, self.parameters)
16 changes: 16 additions & 0 deletions elasticnet/models/ridge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np
from models.linear_regression import RegularizedRegression

class RidgeModel(RegularizedRegression):
def __init__(self, lambda_l2=0.01, alpha=0.001, num_iterations=1000):
super().__init__(regularization='ridge', lambda_l2=lambda_l2, alpha=alpha, num_iterations=num_iterations)

def ridge_loss(self, parameters, features, labels):
return self.linear_loss(parameters, features, labels) + self.lambda_l2 * np.linalg.norm(parameters) ** 2

def ridge_gradient(self, parameters, features, labels):
grad = self.linear_gradient(parameters, features, labels)
grad += 2 * self.lambda_l2 * parameters
return grad
def predict(self, X):
return np.dot(X, self.parameters)
Loading