1
1
from random import rand, randn
2
2
from math import sqrt
3
3
from memory.unsafe import DTypePointer
4
- from types import Matrix
4
+ import numjo as nj
5
+ from numjo import Matrix
5
6
from time import now
6
7
from math import tanh
7
8
from math import exp
@@ -11,32 +12,45 @@ struct Network:
11
12
var _hnodes_l1 : Int
12
13
var _hnodes_l2 : Int
13
14
var _onodes : Int
14
- var lr : Float64
15
+ var lr : Float32
15
16
var _wih : Matrix
16
17
var _whh : Matrix
17
18
var _who : Matrix
18
19
var _bih_l1 : Matrix
19
20
var _bih_l2 : Matrix
20
21
var _bho : Matrix
21
- var _outputs : Matrix
22
22
23
- fn __init__ (inout self , input_nodes : Int, hidden_nodes_l1 : Int, hidden_nodes_l2 : Int, output_nodes : Int, learning_rate : Float64, outputs : Matrix ):
23
+ fn __init__ (inout self , input_nodes : Int, hidden_nodes_l1 : Int, hidden_nodes_l2 : Int, output_nodes : Int, learning_rate : Float32 ):
24
24
self ._inodes = input_nodes
25
25
self ._hnodes_l1 = hidden_nodes_l1
26
26
self ._hnodes_l2 = hidden_nodes_l2
27
27
self ._onodes = output_nodes
28
28
29
29
self .lr = learning_rate
30
+
31
+ self ._wih = Matrix(Float32(0.7 ), self ._inodes, self ._hnodes_l1)
32
+ # self._wih = Matrix.randn(self._inodes, self._hnodes_l1)
33
+ # randn(self._wih.data, self._wih.rows * self._wih.cols)
34
+
35
+ self ._whh = Matrix(Float32(0.4 ), self ._hnodes_l1, self ._hnodes_l2)
36
+ # self._whh = Matrix.randn(self._hnodes_l1, self._hnodes_l2)
37
+ # randn(self._whh.data, self._whh.rows * self._whh.cols)
38
+
39
+ self ._who = Matrix(Float32(0.15 ), self ._hnodes_l2, self ._onodes)
40
+ # self._who = Matrix.randn(self._hnodes_l2, self._onodes)
41
+ # randn(self._who.data, self._who.rows * self._who.cols)
30
42
31
- self ._wih = Matrix.randn(self ._inodes, self ._hnodes_l1) * Matrix(Float64(sqrt(2 / self ._inodes)), self ._inodes, self ._hnodes_l1)
32
- self ._whh = Matrix.randn(self ._hnodes_l1, self ._hnodes_l2) * Matrix(Float64(sqrt(2 / self ._hnodes_l1)), self ._hnodes_l1, self ._hnodes_l2)
33
- self ._who = Matrix.randn(self ._hnodes_l2, self ._onodes) * Matrix(Float64(sqrt(2 / self ._hnodes_l2)), self ._hnodes_l2, self ._onodes)
34
-
35
- self ._bih_l1 = Matrix.rand(1 , self ._hnodes_l1)
36
- self ._bih_l2 = Matrix.rand(1 , self ._hnodes_l2)
37
- self ._bho = Matrix.rand(1 , self ._onodes)
43
+ self ._bih_l1 = Matrix(Float32(0.2 ), 1 , self ._hnodes_l1)
44
+ # self._bih_l1 = Matrix.rand(1, self._hnodes_l1)
45
+ # rand(self._bih_l1.data, self._bih_l1.rows * self._bih_l1.cols)
38
46
39
- self ._outputs = outputs
47
+ self ._bih_l2 = Matrix(Float32(0 ,35 ), 1 , self ._hnodes_l2)
48
+ # self._bih_l2 = Matrix.randn(1, self._hnodes_l2)
49
+ # rand(self._bih_l2.data, self._bih_l2.rows * self._bih_l1.cols)
50
+
51
+ self ._bho = Matrix(Float32(0.45 ), 1 , self ._onodes)
52
+ # self._bho = Matrix.randn(1, self._onodes)
53
+ # rand(self._bho.data, self._bho.rows * self._bho.cols)
40
54
41
55
print (' Initialized a neural network\n '
42
56
' Input Nodes: ' + String(self ._inodes) + ' \n '
@@ -45,170 +59,155 @@ struct Network:
45
59
' Output Nodes: ' + String(self ._onodes))
46
60
47
61
@ staticmethod
48
- fn lrelu (A : Matrix) -> Matrix:
49
- var B : Matrix = Matrix(A.height , A.width )
50
- for i in range (B.height ):
51
- for j in range (B.width ):
62
+ fn relu (A : Matrix) -> Matrix:
63
+ var B : Matrix = Matrix(A.rows , A.cols, True )
64
+ for i in range (B.rows ):
65
+ for j in range (B.cols ):
52
66
if A[i, j] > 0.01 :
53
67
B[i, j] = A[i, j]
54
68
else :
55
- B[i, j] = A[i, j] * 0.01
56
-
69
+ B[i, j] = 0.0
57
70
return B
58
71
59
72
@ staticmethod
60
- fn dlrelu (A : Matrix) -> Matrix:
61
- var B : Matrix = Matrix(A.height , A.width )
62
- for i in range (B.height ):
63
- for j in range (B.width ):
73
+ fn drelu (A : Matrix) -> Matrix:
74
+ var B : Matrix = Matrix(A.rows , A.cols, True )
75
+ for i in range (B.rows ):
76
+ for j in range (B.cols ):
64
77
if A[i, j] > 0.01 :
65
78
B[i, j] = 1.0
66
79
else :
67
- B[i, j] = 0.01
80
+ B[i, j] = 0.0
68
81
return B
69
82
70
83
@ staticmethod
71
84
fn tanh (A : Matrix) -> Matrix:
72
85
# could need optimization alot
73
- var B : Matrix = Matrix(A.height , A.width )
86
+ var B : Matrix = Matrix(A.rows , A.cols, True )
74
87
75
- for i in range (A.height ):
76
- for j in range (A.width ):
88
+ for i in range (A.rows ):
89
+ for j in range (A.cols ):
77
90
B[i, j] = tanh(A[i, j])
78
91
return B
79
92
80
93
@ staticmethod
81
94
fn dtanh (A : Matrix) -> Matrix:
82
95
# could need optimization alot
83
- var B : Matrix = Matrix(A.height , A.width )
96
+ var B : Matrix = Matrix(A.rows , A.cols, True )
84
97
85
- for i in range (A.height ):
86
- for j in range (A.width ):
98
+ for i in range (A.rows ):
99
+ for j in range (A.cols ):
87
100
B[i, j] = 1.0 - tanh(A[i, j]) ** 2
88
101
return B
89
-
90
- fn mse (inout self , A : Matrix) -> Float64:
91
- var sum : Float64 = 0.0
92
- for i in range (A.width):
93
- for j in range (A.height):
94
- sum += A[j, i]
95
- return (sum ** 2 )/ A.height
96
-
97
102
98
103
@ staticmethod
99
104
fn softmax_1d (A : Matrix) -> Matrix:
100
105
# could need optimization alot
101
- var B : Matrix = Matrix(A.height, A.width)
102
- var row_exp_sum : Float64 = 0.0
106
+ var B : Matrix = Matrix(A.rows, A.cols, True )
103
107
104
- for i in range (A.height):
105
- for j in range (A.width):
106
- B[i, j] += exp(A[i, j])
108
+ var row_exp_sum_mat : Matrix = Matrix(A.rows, 1 , True )
109
+ for i in range (A.rows):
110
+ for j in range (A.cols):
111
+ B[i, j] += exp(A[i, j])
107
112
108
- for i in range (A.height ):
109
- for j in range (A.width ):
110
- row_exp_sum += B[i, j]
113
+ for i in range (A.rows ):
114
+ for j in range (A.cols ):
115
+ row_exp_sum_mat[i, 0 ] += B[i, j]
111
116
112
- for i in range (A.height ):
113
- for j in range (A.width ):
114
- B[i, j] /= row_exp_sum
117
+ for i in range (A.rows ):
118
+ for j in range (A.cols ):
119
+ B[i, j] /= row_exp_sum_mat[i, 0 ]
115
120
return B
116
-
117
- fn query (inout self , inputs : Matrix, targets : Matrix, peval : Bool = False ) -> Float64:
118
- let output : Float64 = self .train(inputs, targets, train = False , peval = peval)
121
+
122
+ @ staticmethod
123
+ fn dmse (output_error : Matrix) -> Matrix:
124
+ let deriv_coef : Float32 = 2.0 / output_error.cols
125
+ let deriv = output_error * Matrix(Float32(deriv_coef), output_error.rows, output_error.cols)
126
+ return deriv
127
+
128
+ fn query (inout self , inputs : Matrix, targets : Matrix, peval : Bool = False ) -> Matrix:
129
+ let output : Matrix = self .train(inputs, targets, train = False , peval = peval)
119
130
return output
120
131
121
- fn train (inout self , inputs : Matrix, targets : Matrix, train : Bool = True , peval : Bool = False ) -> Float64 :
132
+ fn train (inout self , inputs : Matrix, targets : Matrix, train : Bool = True , peval : Bool = False ) -> Matrix :
122
133
# init some matrices
123
- var inputs_h1 : Matrix = Matrix(inputs.width , self ._wih.width )
124
- var inputs_h2 : Matrix = Matrix(inputs_h1.height , self ._whh.width )
125
- var output_error : Matrix = Matrix(1 , self ._onodes)
134
+ var inputs_h1 : Matrix = Matrix(inputs.rows , self ._wih.cols )
135
+ var inputs_h2 : Matrix = Matrix(inputs_h1.rows , self ._whh.cols )
136
+ var output_error : Matrix = Matrix(inputs_h2.rows , self ._onodes)
126
137
var output_error_gradient : Matrix = Matrix(1 , self ._onodes)
127
- var hidden_errors_2 : Matrix = Matrix(output_error_gradient.height, self ._who.height)
128
- var hidden_errors_1 : Matrix = Matrix(hidden_errors_2.height, self ._whh.height)
138
+ var hidden_errors_2 : Matrix = Matrix(output_error_gradient.rows, self ._who.rows)
139
+ var hidden_errors_1 : Matrix = Matrix(hidden_errors_2.rows, self ._whh.rows)
140
+ var outputs : Matrix = Matrix(1 , self ._onodes)
129
141
130
142
let time_now = now()
131
-
132
143
# calc output hidden layer1
133
- Matrix.matmul_vectorized(inputs_h1, inputs.transpose(), self ._wih)
144
+ inputs_h1.zero()
145
+ nj.matmul_vectorized(inputs_h1, inputs, self ._wih)
134
146
inputs_h1 = inputs_h1 + self ._bih_l1
135
- inputs_h1 = self .lrelu (inputs_h1)
147
+ inputs_h1 = self .relu (inputs_h1)
136
148
137
149
# calc output hidden layer 2
138
- Matrix.matmul_vectorized(inputs_h2, inputs_h1, self ._whh)
150
+ inputs_h2.zero()
151
+ nj.matmul_vectorized(inputs_h2, inputs_h1, self ._whh)
139
152
inputs_h2 = inputs_h2 + self ._bih_l2
140
153
inputs_h2 = self .tanh(inputs_h2)
141
154
142
155
# calc output output layer
143
- Matrix.matmul_vectorized(self ._outputs, inputs_h2, self ._who)
144
- self ._outputs = self ._outputs + self ._bho
145
- self ._outputs = self .softmax_1d(self ._outputs)
156
+ outputs.zero()
157
+ nj.matmul_vectorized(outputs, inputs_h2, self ._who)
158
+ outputs = outputs + self ._bho
159
+ outputs = self .softmax_1d(outputs)
146
160
147
- output_error = targets.transpose() - self ._outputs
148
- output_error_gradient = output_error
161
+ output_error = (targets - outputs)** 2
162
+ var loss : Matrix = Matrix(1 , 1 )
163
+ loss.store[1 ](0 , 0 , nj.mean(output_error)** 2 )
164
+ output_error = Matrix(Float32(loss[0 , 0 ]), output_error.rows, output_error.cols)
165
+ output_error_gradient = self .dmse(output_error)
149
166
150
- Matrix .matmul_vectorized(hidden_errors_2, output_error_gradient, self ._who.transpose())
151
- Matrix .matmul_vectorized(hidden_errors_1, (hidden_errors_2 * self .dtanh(inputs_h2)), self ._whh.transpose())
167
+ nj .matmul_vectorized(hidden_errors_2, output_error_gradient, self ._who.transpose())
168
+ nj .matmul_vectorized(hidden_errors_1, (hidden_errors_2 * self .dtanh(inputs_h2)), self ._whh.transpose())
152
169
153
- # could return everything as matrices
170
+ var end_time_mat : Matrix = Matrix(1 , 1 )
171
+
154
172
if train:
155
173
self ._update(inputs, inputs_h1, inputs_h2, hidden_errors_1, hidden_errors_2, output_error_gradient)
156
- let end_time = Float64(now() - time_now)
174
+ let end_time = Float32(now() - time_now)
175
+ end_time_mat.store[1 ](0 , 0 , end_time)
157
176
if peval:
158
- return end_time
177
+ return end_time_mat
159
178
else :
160
- return self .mse(output_error)
161
- else :
162
- let end_time = Float64(now() - time_now)
163
- if peval:
164
- return end_time
179
+ return loss
180
+
181
+ let end_time = Float32(now() - time_now)
182
+ end_time_mat.store[1 ](0 , 0 , end_time)
183
+
184
+ if peval:
185
+ return end_time_mat
186
+
187
+ return outputs
165
188
166
- return self .mse(output_error)
167
-
168
189
fn _update (inout self , inputs : Matrix, inputs_h1 : Matrix, inputs_h2 : Matrix, hidden_errors_1 : Matrix, hidden_errors_2 : Matrix, output_error_gradient : Matrix):
169
190
let ho2_drelu : Matrix = hidden_errors_2 * self .dtanh(inputs_h2)
170
- let ho1_drelu : Matrix = hidden_errors_1 * self .dlrelu (inputs_h1)
191
+ let ho1_drelu : Matrix = hidden_errors_1 * self .drelu (inputs_h1)
171
192
172
- var ih2_o : Matrix = Matrix(inputs_h2.width , output_error_gradient.width )
173
- var ih1_ho2 : Matrix = Matrix(inputs_h1.width , ho2_drelu.width )
174
- var i_ho1 : Matrix = Matrix(inputs.height , ho1_drelu.width )
193
+ var ih2_o : Matrix = Matrix(inputs_h2.cols , output_error_gradient.cols )
194
+ var ih1_ho2 : Matrix = Matrix(inputs_h1.cols , ho2_drelu.cols )
195
+ var i_ho1 : Matrix = Matrix(inputs.cols , ho1_drelu.cols )
175
196
176
- Matrix.matmul_vectorized(ih2_o, inputs_h2.transpose(), output_error_gradient)
177
- Matrix.matmul_vectorized(ih1_ho2, inputs_h1.transpose(), ho2_drelu)
178
- Matrix.matmul_vectorized(i_ho1, inputs, ho1_drelu)
197
+ ih2_o.zero()
198
+ nj.matmul_vectorized(ih2_o, inputs_h2.transpose(), output_error_gradient)
199
+ ih1_ho2.zero()
200
+ nj.matmul_vectorized(ih1_ho2, inputs_h1.transpose(), ho2_drelu)
201
+ i_ho1.zero()
202
+ nj.matmul_vectorized(i_ho1, inputs.transpose(), ho1_drelu)
179
203
180
204
# updating weights and biases
181
- Matrix .update(self ._who, ih2_o, self .lr)
182
- Matrix .update(self ._whh, ih1_ho2, self .lr)
183
- Matrix .update(self ._wih, i_ho1, self .lr)
205
+ nj .update(self ._who, ih2_o, self .lr)
206
+ nj .update(self ._whh, ih1_ho2, self .lr)
207
+ nj .update(self ._wih, i_ho1, self .lr)
184
208
185
209
# sum of the A matrices would be better
186
- Matrix.update(self ._bho, output_error_gradient, self .lr)
187
- Matrix.update(self ._bih_l1, ho2_drelu, self .lr)
188
- Matrix.update(self ._bih_l2, ho1_drelu, self .lr)
189
-
190
- '''
191
- var who_: Matrix = Matrix(self._who.height, self._who.width)
192
- var whh_: Matrix = Matrix(self._whh.height, self._whh.width)
193
- var wih_: Matrix = Matrix(self._wih.height, self._wih.width)
194
-
195
- var bho_: Matrix = Matrix(self._bho.height, output_error_gradient.width)
196
- var bih_l1_: Matrix = Matrix(self._bih_l1.height, ho2_drelu.width)
197
- var bih_l2_: Matrix = Matrix(self._bih_l2.height, ho1_drelu.height)
198
-
199
- # for reference
200
- Matrix.update(who_, ih2_o, self.lr)
201
- Matrix.update(whh_, ih1_ho2, self.lr)
202
- Matrix.update(wih_, i_ho1, self.lr)
203
-
204
- Matrix.update(bho_, output_error_gradient, self.lr)
205
- Matrix.update(bih_l1_, ho2_drelu, self.lr)
206
- Matrix.update(bih_l2_, ho1_drelu, self.lr)
207
-
208
- if self._who != who_:
209
- print("Weights updated")
210
- else:
211
- print("Weights not updated")
212
- '''
213
-
214
-
210
+ nj.update(self ._bho, output_error_gradient, self .lr)
211
+ nj.update(self ._bih_l1, ho2_drelu, self .lr)
212
+ nj.update(self ._bih_l2, ho1_drelu, self .lr)
213
+
0 commit comments