Skip to content

Commit 3aebfaa

Browse files
committed
introduced memory leak idk
1 parent 7aab0b2 commit 3aebfaa

14 files changed

+655
-454
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
*.csv
22
.vscode
33
__pycache__
4-
main
4+
main
5+
test_numjo
6+
matmul

Network.mojo

+116-117
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from random import rand, randn
22
from math import sqrt
33
from memory.unsafe import DTypePointer
4-
from types import Matrix
4+
import numjo as nj
5+
from numjo import Matrix
56
from time import now
67
from math import tanh
78
from math import exp
@@ -11,32 +12,45 @@ struct Network:
1112
var _hnodes_l1: Int
1213
var _hnodes_l2: Int
1314
var _onodes: Int
14-
var lr: Float64
15+
var lr: Float32
1516
var _wih: Matrix
1617
var _whh: Matrix
1718
var _who: Matrix
1819
var _bih_l1: Matrix
1920
var _bih_l2: Matrix
2021
var _bho: Matrix
21-
var _outputs: Matrix
2222

23-
fn __init__(inout self, input_nodes: Int, hidden_nodes_l1: Int, hidden_nodes_l2: Int, output_nodes: Int, learning_rate: Float64, outputs: Matrix):
23+
fn __init__(inout self, input_nodes: Int, hidden_nodes_l1: Int, hidden_nodes_l2: Int, output_nodes: Int, learning_rate: Float32):
2424
self._inodes = input_nodes
2525
self._hnodes_l1 = hidden_nodes_l1
2626
self._hnodes_l2 = hidden_nodes_l2
2727
self._onodes = output_nodes
2828

2929
self.lr = learning_rate
30+
31+
self._wih = Matrix(Float32(0.7), self._inodes, self._hnodes_l1)
32+
# self._wih = Matrix.randn(self._inodes, self._hnodes_l1)
33+
# randn(self._wih.data, self._wih.rows * self._wih.cols)
34+
35+
self._whh = Matrix(Float32(0.4), self._hnodes_l1, self._hnodes_l2)
36+
# self._whh = Matrix.randn(self._hnodes_l1, self._hnodes_l2)
37+
# randn(self._whh.data, self._whh.rows * self._whh.cols)
38+
39+
self._who = Matrix(Float32(0.15), self._hnodes_l2, self._onodes)
40+
# self._who = Matrix.randn(self._hnodes_l2, self._onodes)
41+
# randn(self._who.data, self._who.rows * self._who.cols)
3042

31-
self._wih = Matrix.randn(self._inodes, self._hnodes_l1) * Matrix(Float64(sqrt(2/self._inodes)), self._inodes, self._hnodes_l1)
32-
self._whh = Matrix.randn(self._hnodes_l1, self._hnodes_l2) * Matrix(Float64(sqrt(2/self._hnodes_l1)), self._hnodes_l1, self._hnodes_l2)
33-
self._who = Matrix.randn(self._hnodes_l2, self._onodes) * Matrix(Float64(sqrt(2/self._hnodes_l2)), self._hnodes_l2, self._onodes)
34-
35-
self._bih_l1 = Matrix.rand(1, self._hnodes_l1)
36-
self._bih_l2 = Matrix.rand(1, self._hnodes_l2)
37-
self._bho = Matrix.rand(1, self._onodes)
43+
self._bih_l1 = Matrix(Float32(0.2), 1, self._hnodes_l1)
44+
# self._bih_l1 = Matrix.rand(1, self._hnodes_l1)
45+
# rand(self._bih_l1.data, self._bih_l1.rows * self._bih_l1.cols)
3846

39-
self._outputs = outputs
47+
self._bih_l2 = Matrix(Float32(0,35), 1, self._hnodes_l2)
48+
# self._bih_l2 = Matrix.randn(1, self._hnodes_l2)
49+
# rand(self._bih_l2.data, self._bih_l2.rows * self._bih_l1.cols)
50+
51+
self._bho = Matrix(Float32(0.45), 1, self._onodes)
52+
# self._bho = Matrix.randn(1, self._onodes)
53+
# rand(self._bho.data, self._bho.rows * self._bho.cols)
4054

4155
print('Initialized a neural network\n'
4256
'Input Nodes: ' + String(self._inodes) + '\n'
@@ -45,170 +59,155 @@ struct Network:
4559
'Output Nodes: ' + String(self._onodes))
4660

4761
@staticmethod
48-
fn lrelu(A: Matrix) -> Matrix:
49-
var B: Matrix = Matrix(A.height, A.width)
50-
for i in range(B.height):
51-
for j in range(B.width):
62+
fn relu(A: Matrix) -> Matrix:
63+
var B: Matrix = Matrix(A.rows, A.cols, True)
64+
for i in range(B.rows):
65+
for j in range(B.cols):
5266
if A[i, j] > 0.01:
5367
B[i, j] = A[i, j]
5468
else:
55-
B[i, j] = A[i, j] * 0.01
56-
69+
B[i, j] = 0.0
5770
return B
5871

5972
@staticmethod
60-
fn dlrelu(A: Matrix) -> Matrix:
61-
var B: Matrix = Matrix(A.height, A.width)
62-
for i in range(B.height):
63-
for j in range(B.width):
73+
fn drelu(A: Matrix) -> Matrix:
74+
var B: Matrix = Matrix(A.rows, A.cols, True)
75+
for i in range(B.rows):
76+
for j in range(B.cols):
6477
if A[i, j] > 0.01:
6578
B[i, j] = 1.0
6679
else:
67-
B[i, j] = 0.01
80+
B[i, j] = 0.0
6881
return B
6982

7083
@staticmethod
7184
fn tanh(A: Matrix) -> Matrix:
7285
# could need optimization alot
73-
var B: Matrix = Matrix(A.height, A.width)
86+
var B: Matrix = Matrix(A.rows, A.cols, True)
7487

75-
for i in range(A.height):
76-
for j in range(A.width):
88+
for i in range(A.rows):
89+
for j in range(A.cols):
7790
B[i, j] = tanh(A[i, j])
7891
return B
7992

8093
@staticmethod
8194
fn dtanh(A: Matrix) -> Matrix:
8295
# could need optimization alot
83-
var B: Matrix = Matrix(A.height, A.width)
96+
var B: Matrix = Matrix(A.rows, A.cols, True)
8497

85-
for i in range(A.height):
86-
for j in range(A.width):
98+
for i in range(A.rows):
99+
for j in range(A.cols):
87100
B[i, j] = 1.0 - tanh(A[i, j]) ** 2
88101
return B
89-
90-
fn mse(inout self, A: Matrix) -> Float64:
91-
var sum: Float64 = 0.0
92-
for i in range(A.width):
93-
for j in range(A.height):
94-
sum += A[j, i]
95-
return (sum**2)/A.height
96-
97102

98103
@staticmethod
99104
fn softmax_1d(A: Matrix) -> Matrix:
100105
# could need optimization alot
101-
var B: Matrix = Matrix(A.height, A.width)
102-
var row_exp_sum: Float64 = 0.0
106+
var B: Matrix = Matrix(A.rows, A.cols, True)
103107

104-
for i in range(A.height):
105-
for j in range(A.width):
106-
B[i, j] += exp(A[i, j])
108+
var row_exp_sum_mat: Matrix = Matrix(A.rows, 1, True)
109+
for i in range(A.rows):
110+
for j in range(A.cols):
111+
B[i, j] += exp(A[i, j])
107112

108-
for i in range(A.height):
109-
for j in range(A.width):
110-
row_exp_sum += B[i, j]
113+
for i in range(A.rows):
114+
for j in range(A.cols):
115+
row_exp_sum_mat[i, 0] += B[i, j]
111116

112-
for i in range(A.height):
113-
for j in range(A.width):
114-
B[i, j] /= row_exp_sum
117+
for i in range(A.rows):
118+
for j in range(A.cols):
119+
B[i, j] /= row_exp_sum_mat[i, 0]
115120
return B
116-
117-
fn query(inout self, inputs: Matrix, targets: Matrix, peval: Bool = False) -> Float64:
118-
let output: Float64 = self.train(inputs, targets, train = False, peval=peval)
121+
122+
@staticmethod
123+
fn dmse(output_error: Matrix) -> Matrix:
124+
let deriv_coef: Float32 = 2.0 / output_error.cols
125+
let deriv = output_error * Matrix(Float32(deriv_coef), output_error.rows, output_error.cols)
126+
return deriv
127+
128+
fn query(inout self, inputs: Matrix, targets: Matrix, peval: Bool = False) -> Matrix:
129+
let output: Matrix = self.train(inputs, targets, train = False, peval=peval)
119130
return output
120131

121-
fn train(inout self, inputs: Matrix, targets: Matrix, train: Bool = True, peval: Bool = False) -> Float64:
132+
fn train(inout self, inputs: Matrix, targets: Matrix, train: Bool = True, peval: Bool = False) -> Matrix:
122133
# init some matrices
123-
var inputs_h1: Matrix = Matrix(inputs.width, self._wih.width)
124-
var inputs_h2: Matrix = Matrix(inputs_h1.height, self._whh.width)
125-
var output_error: Matrix = Matrix(1, self._onodes)
134+
var inputs_h1: Matrix = Matrix(inputs.rows, self._wih.cols)
135+
var inputs_h2: Matrix = Matrix(inputs_h1.rows, self._whh.cols)
136+
var output_error: Matrix = Matrix(inputs_h2.rows, self._onodes)
126137
var output_error_gradient: Matrix = Matrix(1, self._onodes)
127-
var hidden_errors_2: Matrix = Matrix(output_error_gradient.height, self._who.height)
128-
var hidden_errors_1: Matrix = Matrix(hidden_errors_2.height, self._whh.height)
138+
var hidden_errors_2: Matrix = Matrix(output_error_gradient.rows, self._who.rows)
139+
var hidden_errors_1: Matrix = Matrix(hidden_errors_2.rows, self._whh.rows)
140+
var outputs: Matrix = Matrix(1, self._onodes)
129141

130142
let time_now = now()
131-
132143
# calc output hidden layer1
133-
Matrix.matmul_vectorized(inputs_h1, inputs.transpose(), self._wih)
144+
inputs_h1.zero()
145+
nj.matmul_vectorized(inputs_h1, inputs, self._wih)
134146
inputs_h1 = inputs_h1 + self._bih_l1
135-
inputs_h1 = self.lrelu(inputs_h1)
147+
inputs_h1 = self.relu(inputs_h1)
136148

137149
# calc output hidden layer 2
138-
Matrix.matmul_vectorized(inputs_h2, inputs_h1, self._whh)
150+
inputs_h2.zero()
151+
nj.matmul_vectorized(inputs_h2, inputs_h1, self._whh)
139152
inputs_h2 = inputs_h2 + self._bih_l2
140153
inputs_h2 = self.tanh(inputs_h2)
141154

142155
# calc output output layer
143-
Matrix.matmul_vectorized(self._outputs, inputs_h2, self._who)
144-
self._outputs = self._outputs + self._bho
145-
self._outputs = self.softmax_1d(self._outputs)
156+
outputs.zero()
157+
nj.matmul_vectorized(outputs, inputs_h2, self._who)
158+
outputs = outputs + self._bho
159+
outputs = self.softmax_1d(outputs)
146160

147-
output_error = targets.transpose() - self._outputs
148-
output_error_gradient = output_error
161+
output_error = (targets - outputs)**2
162+
var loss: Matrix = Matrix(1, 1)
163+
loss.store[1](0, 0, nj.mean(output_error)**2)
164+
output_error = Matrix(Float32(loss[0, 0]), output_error.rows, output_error.cols)
165+
output_error_gradient = self.dmse(output_error)
149166

150-
Matrix.matmul_vectorized(hidden_errors_2, output_error_gradient, self._who.transpose())
151-
Matrix.matmul_vectorized(hidden_errors_1, (hidden_errors_2 * self.dtanh(inputs_h2)), self._whh.transpose())
167+
nj.matmul_vectorized(hidden_errors_2, output_error_gradient, self._who.transpose())
168+
nj.matmul_vectorized(hidden_errors_1, (hidden_errors_2 * self.dtanh(inputs_h2)), self._whh.transpose())
152169

153-
# could return everything as matrices
170+
var end_time_mat: Matrix = Matrix(1, 1)
171+
154172
if train:
155173
self._update(inputs, inputs_h1, inputs_h2, hidden_errors_1, hidden_errors_2, output_error_gradient)
156-
let end_time = Float64(now() - time_now)
174+
let end_time = Float32(now() - time_now)
175+
end_time_mat.store[1](0, 0, end_time)
157176
if peval:
158-
return end_time
177+
return end_time_mat
159178
else:
160-
return self.mse(output_error)
161-
else:
162-
let end_time = Float64(now() - time_now)
163-
if peval:
164-
return end_time
179+
return loss
180+
181+
let end_time = Float32(now() - time_now)
182+
end_time_mat.store[1](0, 0, end_time)
183+
184+
if peval:
185+
return end_time_mat
186+
187+
return outputs
165188

166-
return self.mse(output_error)
167-
168189
fn _update(inout self, inputs: Matrix, inputs_h1: Matrix, inputs_h2: Matrix, hidden_errors_1: Matrix, hidden_errors_2: Matrix, output_error_gradient: Matrix):
169190
let ho2_drelu: Matrix = hidden_errors_2 * self.dtanh(inputs_h2)
170-
let ho1_drelu: Matrix = hidden_errors_1 * self.dlrelu(inputs_h1)
191+
let ho1_drelu: Matrix = hidden_errors_1 * self.drelu(inputs_h1)
171192

172-
var ih2_o: Matrix = Matrix(inputs_h2.width, output_error_gradient.width)
173-
var ih1_ho2: Matrix = Matrix(inputs_h1.width, ho2_drelu.width)
174-
var i_ho1: Matrix = Matrix(inputs.height, ho1_drelu.width)
193+
var ih2_o: Matrix = Matrix(inputs_h2.cols, output_error_gradient.cols)
194+
var ih1_ho2: Matrix = Matrix(inputs_h1.cols, ho2_drelu.cols)
195+
var i_ho1: Matrix = Matrix(inputs.cols, ho1_drelu.cols)
175196

176-
Matrix.matmul_vectorized(ih2_o, inputs_h2.transpose(), output_error_gradient)
177-
Matrix.matmul_vectorized(ih1_ho2, inputs_h1.transpose(), ho2_drelu)
178-
Matrix.matmul_vectorized(i_ho1, inputs, ho1_drelu)
197+
ih2_o.zero()
198+
nj.matmul_vectorized(ih2_o, inputs_h2.transpose(), output_error_gradient)
199+
ih1_ho2.zero()
200+
nj.matmul_vectorized(ih1_ho2, inputs_h1.transpose(), ho2_drelu)
201+
i_ho1.zero()
202+
nj.matmul_vectorized(i_ho1, inputs.transpose(), ho1_drelu)
179203

180204
# updating weights and biases
181-
Matrix.update(self._who, ih2_o, self.lr)
182-
Matrix.update(self._whh, ih1_ho2, self.lr)
183-
Matrix.update(self._wih, i_ho1, self.lr)
205+
nj.update(self._who, ih2_o, self.lr)
206+
nj.update(self._whh, ih1_ho2, self.lr)
207+
nj.update(self._wih, i_ho1, self.lr)
184208

185209
# sum of the A matrices would be better
186-
Matrix.update(self._bho, output_error_gradient, self.lr)
187-
Matrix.update(self._bih_l1, ho2_drelu, self.lr)
188-
Matrix.update(self._bih_l2, ho1_drelu, self.lr)
189-
190-
'''
191-
var who_: Matrix = Matrix(self._who.height, self._who.width)
192-
var whh_: Matrix = Matrix(self._whh.height, self._whh.width)
193-
var wih_: Matrix = Matrix(self._wih.height, self._wih.width)
194-
195-
var bho_: Matrix = Matrix(self._bho.height, output_error_gradient.width)
196-
var bih_l1_: Matrix = Matrix(self._bih_l1.height, ho2_drelu.width)
197-
var bih_l2_: Matrix = Matrix(self._bih_l2.height, ho1_drelu.height)
198-
199-
# for reference
200-
Matrix.update(who_, ih2_o, self.lr)
201-
Matrix.update(whh_, ih1_ho2, self.lr)
202-
Matrix.update(wih_, i_ho1, self.lr)
203-
204-
Matrix.update(bho_, output_error_gradient, self.lr)
205-
Matrix.update(bih_l1_, ho2_drelu, self.lr)
206-
Matrix.update(bih_l2_, ho1_drelu, self.lr)
207-
208-
if self._who != who_:
209-
print("Weights updated")
210-
else:
211-
print("Weights not updated")
212-
'''
213-
214-
210+
nj.update(self._bho, output_error_gradient, self.lr)
211+
nj.update(self._bih_l1, ho2_drelu, self.lr)
212+
nj.update(self._bih_l2, ho1_drelu, self.lr)
213+

0 commit comments

Comments
 (0)