-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
175 lines (140 loc) · 5.73 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import streamlit as st
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# Define the neural network class
class NeuralNetwork:
def __init__(self, input_layer_size, hidden_layer_size, output_layer_size, X):
self.input_layer_size = input_layer_size
self.hidden_layer_size = hidden_layer_size
self.output_layer_size = output_layer_size
# Initialize the weights with random values
self.W1 = np.random.randn(input_layer_size, hidden_layer_size)
self.W2 = np.random.randn(hidden_layer_size, output_layer_size)
# Initialize the biases with zeros
self.b1 = np.zeros((1, hidden_layer_size))
self.b2 = np.zeros((1, output_layer_size))
def sigmoid(self, x):
x = np.array(x, dtype=float)
return 1 / (1 + np.exp(-x))
def forward_propagation(self, X):
# Calculate the hidden layer activations
self.Z1 = np.dot(X, self.W1) + self.b1
self.A1 = self.sigmoid(self.Z1)
# Calculate the output layer activations
self.Z2 = np.dot(self.A1, self.W2) + self.b2
self.A2 = self.sigmoid(self.Z2)
return self.A2
def backward_propagation(self, X, Y, output, learning_rate):
# Reshape Y to match the shape of output
Y = Y.values.reshape(-1, 1)
# Calculate the error in the output layer
dZ2 = output - Y
dW2 = np.dot(self.A1.T, dZ2)
db2 = np.sum(dZ2, axis=0, keepdims=True)
# Calculate the error in the hidden layer
dZ1 = np.dot(dZ2, self.W2.T) * (self.A1 * (1 - self.A1))
dW1 = np.dot(X.T, dZ1)
db1 = np.sum(dZ1, axis=0, keepdims=True)
self.W1 = self.W1.astype('float64')
dW1 = dW1.astype('float64')
# Update the weights and biases
self.W1 -= learning_rate * dW1
self.b1 -= learning_rate * db1
self.W2 -= learning_rate * dW2
self.b2 -= learning_rate * db2
def loss(self, y_pred, y_true):
y_true = y_true.values.reshape(-1, 1)
y_pred_binary = (y_pred >= 0.5).astype(int)
y_true_binary = (y_true >= 0.5).astype(int)
mse = np.mean((y_pred - y_true_binary)**2)
return mse
def accuracy(self, y_pred, y_true):
y_true = y_true.values.reshape(-1, 1)
y_pred_binary = (y_pred >= 0.5).astype(int)
y_true_binary = (y_true >= 0.5).astype(int)
return (y_pred_binary == y_true_binary).mean() * 100
def rmsee(self, y_pred, y_train):
mse = mean_squared_error(y_train, y_pred)
rmse = mean_squared_error(y_train, y_pred, squared=False)
return rmse
def train(self, X, Y, epoch=10, alpha=0.01):
acc = []
losss = []
rm = []
for j in range(epoch):
out = self.forward_propagation(X)
self.backward_propagation(X, Y, out, alpha)
acc.append(self.accuracy(out, Y))
losss.append(self.loss(out, Y))
rm.append(self.rmsee(out, Y))
return acc, losss, rm
def predict(self, X):
# Forward propagation to get the output
output = self.forward_propagation(X)
# Apply the threshold to classify the output
predictions = (output >= 0.5).astype(int)
return predictions
# Streamlit app
st.title("Groundwater Quality Prediction")
uploaded_file = st.file_uploader("Ground Water.csv", type="csv")
if uploaded_file is not None:
data = pd.read_csv(uploaded_file)
# Data preprocessing
numeric_columns = data.select_dtypes(include='number').columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].median())
# Select only numeric columns for quantile calculations
numeric_data = data.select_dtypes(include=[np.number])
data[numeric_data.columns] = numeric_data.clip(lower=numeric_data.quantile(0.01), upper=numeric_data.quantile(0.99), axis=1)
# Encode categorical variables
data = pd.get_dummies(data)
# Scale numerical variables
scaler = StandardScaler()
data[data.select_dtypes(include=['float64']).columns] = scaler.fit_transform(data.select_dtypes(include=['float64']))
data = data.drop(data.columns[[1, 2]], axis=1)
X = data.iloc[:, :-3]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
# Define your ANN architecture
input_layer_size = X_train.shape[1]
hidden_layer_size = 10
output_layer_size = 1
# Initialize and train the neural network
nn = NeuralNetwork(input_layer_size, hidden_layer_size, output_layer_size, X_train)
acc, losss, rm = nn.train(X_train, y_train, epoch=10, alpha=0.01)
# Make predictions
predictions = nn.predict(X_test)
# Display results
st.write("Predictions:", predictions)
accuracy = nn.accuracy(predictions, y_test)
st.write("Test Accuracy:", accuracy)
# Print messages for groundwater quality
for i, prediction in enumerate(predictions):
if prediction == 1:
st.write(f"Sample {i+1}: Groundwater is harmful.")
else:
st.write(f"Sample {i+1}: Groundwater is not harmful.")
# Plotting accuracy
st.subheader("Accuracy over Time")
fig, ax = plt.subplots()
ax.plot(acc)
ax.set_xlabel("Epoch")
ax.set_ylabel("Accuracy")
st.pyplot(fig)
# Plotting Loss
st.subheader("Loss over Time")
fig, ax = plt.subplots()
ax.plot(losss)
ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")
st.pyplot(fig)
# Plotting RMSE
st.subheader("RMSE over Time")
fig, ax = plt.subplots()
ax.plot(rm)
ax.set_xlabel("Epoch")
ax.set_ylabel("RMSE value")
st.pyplot(fig)