-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdense.cu
124 lines (97 loc) · 4.99 KB
/
dense.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include "dense.cuh"
Dense::Dense(int input_size, int output_size, cudnnActivationMode_t mode)
{
this->input_size = input_size;
this->output_size = output_size;
this->activation_mode = mode;
CUDA_CHECK(cudaMalloc((void **)&weights, input_size * output_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&bias, output_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&a, output_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&d_weights, input_size * output_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&d_bias, output_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&input_grad, input_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&z, output_size * sizeof(double)));
CUDA_CHECK(cudaMalloc((void **)&dz, output_size * sizeof(double)));
curandGenerator_t curand_generator;
curandCreateGenerator(&curand_generator, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(curand_generator, CURAND_RNG_PSEUDO_MT19937);
curandGenerateNormalDouble(curand_generator, weights, input_size * output_size, 0, 0.1);
curandGenerateNormalDouble(curand_generator, bias, output_size, 0, 0.1);
CUDA_CHECK(cudaMemset(d_weights, 0.0, input_size * output_size * sizeof(double)));
CUDA_CHECK(cudaMemset(d_bias, 0.0, output_size * sizeof(double)));
CUDA_CHECK(cudaMemset(input_grad, 0.0, input_size * sizeof(double)));
CUBLAS_CHECK(cublasCreate(&cublas_handle));
CUDNN_CHECK(cudnnCreate(&cudnn_handle));
curandDestroyGenerator(curand_generator);
CUDNN_CHECK(cudnnCreateActivationDescriptor(&act_desc));
CUDNN_CHECK(cudnnSetActivationDescriptor(act_desc, activation_mode, CUDNN_PROPAGATE_NAN, 0));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnSetTensor4dDescriptor(input_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, 1, 1, input_size, 1));
CUDNN_CHECK(cudnnSetTensor4dDescriptor(output_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, 1, 1, output_size, 1));
}
Dense::~Dense()
{
try{
CUDA_CHECK(cudaFree(weights));
CUDA_CHECK(cudaFree(bias));
CUDA_CHECK(cudaFree(a));
CUDA_CHECK(cudaFree(d_weights));
CUDA_CHECK(cudaFree(d_bias));
CUDA_CHECK(cudaFree(input_grad));
CUDA_CHECK(cudaFree(z));
CUDA_CHECK(cudaFree(dz));
CUBLAS_CHECK(cublasDestroy(cublas_handle));
CUDNN_CHECK(cudnnDestroy(cudnn_handle));
CUDNN_CHECK(cudnnDestroyActivationDescriptor(act_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(output_desc));
}catch(const std::exception& e){
std::cerr << e.what() << '\n';
}
}
double* Dense::forward(double *input_data) {
this->input = input_data;
double alpha = 1.0;
double beta = 0.0;
CUDA_CHECK(cudaMemset(z, 0.0, output_size * sizeof(double)));
// a = weights * input
CUBLAS_CHECK(cublasDgemm(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N,
output_size, 1, input_size,
&alpha, weights, input_size, input_data, input_size,
&beta, z, output_size));
CUBLAS_CHECK(cublasDaxpy(cublas_handle, output_size, &alpha, bias, 1, z, 1));
CUDNN_CHECK(cudnnActivationForward(cudnn_handle, act_desc, &alpha, output_desc, z, &beta, output_desc, a));
return a;
}
double* Dense::backward(double *output_grad) {
double alpha = 1.0;
double beta = 0.0;
// calculate d_dz
CUDNN_CHECK(cudnnActivationBackward(cudnn_handle, act_desc, &alpha, output_desc, a, output_desc, output_grad, output_desc, z, &beta, output_desc, dz));
// calculate d_weights
CUBLAS_CHECK(cublasDgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
input_size, output_size, 1,
&alpha, input, input_size, dz, 1,
&alpha, d_weights, input_size));
// std::cout << "d_weights" << std::endl;
// cublasPrintMat(d_weights, input_size, output_size);
// calculate d_bias
CUBLAS_CHECK(cublasDaxpy(cublas_handle, output_size, &alpha, dz, 1, d_bias, 1));
// cublasPrintMat(d_bias, output_size, 1, "d_bias");
// calculate d_input
CUBLAS_CHECK(cublasDgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
input_size, 1, output_size,
&alpha, weights, input_size, dz, output_size,
&beta, input_grad, input_size));
return input_grad;
}
void Dense::update(double lr) {
// update weights and bias
double alpha = -lr;
CUBLAS_CHECK(cublasDaxpy(cublas_handle, input_size * output_size, &alpha, d_weights, 1, weights, 1));
CUBLAS_CHECK(cublasDaxpy(cublas_handle, output_size, &alpha, d_bias, 1, bias, 1));
// reset d_weights and d_bias
CUDA_CHECK(cudaMemset(d_weights, 0.0, input_size * output_size * sizeof(double)));
CUDA_CHECK(cudaMemset(d_bias, 0.0, output_size * sizeof(double)));
}