diff --git a/.gitignore b/.gitignore index 259148fa1..44bd59f74 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# NetCDF +*.nc + # Prerequisites *.d diff --git a/app/train-cloud-microphysics.f90 b/app/train-cloud-microphysics.f90 index 6f70ba4cc..e24f82333 100644 --- a/app/train-cloud-microphysics.f90 +++ b/app/train-cloud-microphysics.f90 @@ -38,7 +38,8 @@ program train_cloud_microphysics !! Internal dependencies; use inference_engine_m, only : & - inference_engine_t, mini_batch_t, input_output_pair_t, tensor_t, trainable_engine_t, rkind, NetCDF_file_t, sigmoid_t + inference_engine_t, mini_batch_t, input_output_pair_t, tensor_t, trainable_engine_t, rkind, NetCDF_file_t, sigmoid_t, & + training_configuration_t use ubounds_m, only : ubounds_t implicit none @@ -46,7 +47,7 @@ program train_cloud_microphysics type(command_line_t) command_line type(file_t) plot_file type(string_t), allocatable :: lines(:) - character(len=*), parameter :: plot_file_name = "cost.plt" + character(len=*), parameter :: plot_file_name = "cost.plt", training_configuration_json = "training_configuration.json " character(len=:), allocatable :: base_name, stride_string, epochs_string, last_line integer plot_unit, stride, num_epochs, previous_epoch logical preexisting_plot_file @@ -76,7 +77,7 @@ program train_cloud_microphysics read(last_line,*) previous_epoch end if - call read_train_write + call read_train_write(training_configuration_t(file_t(string_t(training_configuration_json)))) close(plot_unit) call system_clock(t_finish) @@ -85,7 +86,8 @@ program train_cloud_microphysics contains - subroutine read_train_write + subroutine read_train_write(training_configuration) + type(training_configuration_t), intent(in) :: training_configuration real, allocatable, dimension(:,:,:,:) :: & pressure_in , potential_temperature_in , temperature_in , & pressure_out, potential_temperature_out, temperature_out, & @@ -195,7 +197,7 @@ subroutine read_train_write else close(network_unit) print *,"Initializing a new network" - trainable_engine = new_engine(num_hidden_layers=6, nodes_per_hidden_layer=16, num_inputs=8, num_outputs=6, random=.false.) + trainable_engine = new_engine(training_configuration, randomize=.true.) end if print *,"Defining tensors from time steps 1 through", t_end, "with strides of", stride @@ -229,7 +231,12 @@ subroutine read_train_write end associate end associate - associate(num_pairs => size(input_output_pairs), n_bins => 1) ! also tried n_bins => size(input_output_pairs)/10000 + associate( & + num_pairs => size(input_output_pairs), & + n_bins => training_configuration%mini_batches(), & + adam => merge(.true., .false., training_configuration%optimizer_name() == "adam"), & + learning_rate => training_configuration%learning_rate() & + ) bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)] print *,"Training network" @@ -239,7 +246,7 @@ subroutine read_train_write call shuffle(input_output_pairs) ! set up for stochastic gradient descent mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost) + call trainable_engine%train(mini_batches, cost, adam, learning_rate) print *, epoch, minval(cost), maxval(cost), sum(cost)/size(cost) write(plot_unit,*) epoch, minval(cost), maxval(cost), sum(cost)/size(cost) @@ -267,19 +274,26 @@ subroutine read_train_write end subroutine read_train_write - function new_engine(num_hidden_layers, nodes_per_hidden_layer, num_inputs, num_outputs, random) result(trainable_engine) - integer, intent(in) :: num_hidden_layers, nodes_per_hidden_layer, num_inputs, num_outputs - logical, intent(in) :: random + function new_engine(training_configuration, randomize) result(trainable_engine) + logical, intent(in) :: randomize + type(training_configuration_t), intent(in) :: training_configuration type(trainable_engine_t) trainable_engine real(rkind), allocatable :: w(:,:,:), b(:,:) + character(len=len('YYYMMDD')) date integer l + + call date_and_time(date) - associate(nodes => [num_inputs, [(nodes_per_hidden_layer, l = 1, num_hidden_layers)], num_outputs]) + associate( & + nodes => training_configuration%nodes_per_layer(), & + activation => training_configuration%differentiable_activation_strategy(), & + residual_network => string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & + ) associate(max_nodes => maxval(nodes), layers => size(nodes)) allocate(w(max_nodes, max_nodes, layers-1), b(max_nodes, max_nodes)) - if (random) then + if (randomize) then call random_number(b) call random_number(w) else @@ -288,8 +302,8 @@ function new_engine(num_hidden_layers, nodes_per_hidden_layer, num_inputs, num_o end if trainable_engine = trainable_engine_t( & - nodes = nodes, weights = w, biases = b, differentiable_activation_strategy = sigmoid_t(), metadata = & - [string_t("Microphysics"), string_t("Damian Rouson"), string_t("2023-08-18"), string_t("sigmoid"), string_t("false")] & + nodes = nodes, weights = w, biases = b, differentiable_activation_strategy = activation, metadata = & + [string_t("Microphysics"), string_t("Inference Engine"), string_t(date), activation%function_name(), residual_network] & ) end associate end associate diff --git a/example/fit-polynomials.f90 b/example/fit-polynomials.f90 index 3f5088b38..77e435d71 100644 --- a/example/fit-polynomials.f90 +++ b/example/fit-polynomials.f90 @@ -58,7 +58,7 @@ program train_polynomials call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) print *,sum(cost)/size(cost) end do end block diff --git a/example/learn-addition.f90 b/example/learn-addition.f90 index ad422eccf..65bb35014 100644 --- a/example/learn-addition.f90 +++ b/example/learn-addition.f90 @@ -77,7 +77,7 @@ program train_polynomials call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) print *,sum(cost)/size(cost) end do end block diff --git a/example/learn-exponentiation.f90 b/example/learn-exponentiation.f90 index e57c26bb9..7aeff0950 100644 --- a/example/learn-exponentiation.f90 +++ b/example/learn-exponentiation.f90 @@ -77,7 +77,7 @@ program train_polynomials call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) print *,sum(cost)/size(cost) end do end block diff --git a/example/learn-microphysics-procedures.f90 b/example/learn-microphysics-procedures.f90 index 094eacb99..abb7d97c1 100644 --- a/example/learn-microphysics-procedures.f90 +++ b/example/learn-microphysics-procedures.f90 @@ -86,7 +86,7 @@ program learn_microphysics_procedures call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) call system_clock(counter_end, clock_rate) associate( & diff --git a/example/learn-multiplication.f90 b/example/learn-multiplication.f90 index 8f8b37e18..72d1a3b40 100644 --- a/example/learn-multiplication.f90 +++ b/example/learn-multiplication.f90 @@ -77,7 +77,7 @@ program train_polynomials call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) print *,sum(cost)/size(cost) end do end block diff --git a/example/learn-power-series.f90 b/example/learn-power-series.f90 index 216d1f712..7019425de 100644 --- a/example/learn-power-series.f90 +++ b/example/learn-power-series.f90 @@ -79,7 +79,7 @@ program train_polynomials call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) print *,sum(cost)/size(cost) end do end block diff --git a/example/learn-saturated-mixing-ratio.f90 b/example/learn-saturated-mixing-ratio.f90 index e7936927c..0385f35f5 100644 --- a/example/learn-saturated-mixing-ratio.f90 +++ b/example/learn-saturated-mixing-ratio.f90 @@ -85,7 +85,7 @@ program train_saturated_mixture_ratio call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) call system_clock(counter_end, clock_rate) associate( & diff --git a/example/print-training-configuration.f90 b/example/print-training-configuration.f90 index c002286c2..3a29a9352 100644 --- a/example/print-training-configuration.f90 +++ b/example/print-training-configuration.f90 @@ -6,10 +6,12 @@ program print_training_configuration associate(training_configuration => training_configuration_t( & hyperparameters_t(mini_batches=10, learning_rate=1.5, optimizer = "adam"), & - network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_function="sigmoid") & + network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_name="sigmoid") & )) - associate(json_file => file_t(training_configuration%to_json())) - call json_file%write_lines() + associate(lines => training_configuration%to_json()) + associate(json_file => file_t(lines)) + call json_file%write_lines() + end associate end associate end associate end program diff --git a/example/train-and-write.f90 b/example/train-and-write.f90 index a1f26033e..490b0e9a3 100644 --- a/example/train-and-write.f90 +++ b/example/train-and-write.f90 @@ -65,7 +65,7 @@ program train_and_write call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) print *,sum(cost)/size(cost) end do end block diff --git a/fpm.toml b/fpm.toml index f8274f992..ac97ea50e 100644 --- a/fpm.toml +++ b/fpm.toml @@ -1,10 +1,10 @@ name = "inference-engine" version = "0.5.0" license = "license" -author = "Damian Rouson, Tan Nguyen, Jordan Welsman" +author = "Damian Rouson, Tan Nguyen, Jordan Welsman, David Torres" maintainer = "rouson@lbl.gov" [dependencies] assert = {git = "https://github.com/sourceryinstitute/assert", tag = "1.5.0"} -sourcery = {git = "https://github.com/sourceryinstitute/sourcery", tag = "4.4.3"} +sourcery = {git = "https://github.com/sourceryinstitute/sourcery", tag = "4.4.4"} netcdf-interfaces = {git = "https://github.com/rouson/netcdf-interfaces.git", branch = "implicit-interfaces"} diff --git a/src/inference_engine/hyperparameters_m.f90 b/src/inference_engine/hyperparameters_m.f90 index 35eb5156a..e2b22ba43 100644 --- a/src/inference_engine/hyperparameters_m.f90 +++ b/src/inference_engine/hyperparameters_m.f90 @@ -1,5 +1,6 @@ module hyperparameters_m use sourcery_m, only : string_t + use kind_parameters_m, only : rkind implicit none private @@ -14,7 +15,10 @@ module hyperparameters_m procedure :: to_json procedure :: equals generic :: operator(==) => equals - end type + procedure :: mini_batches + procedure :: optimizer_name + procedure :: learning_rate + end type interface hyperparameters_t @@ -48,6 +52,24 @@ elemental module function equals(lhs, rhs) result(lhs_equals_rhs) logical lhs_equals_rhs end function + elemental module function mini_batches(self) result(num_mini_batches) + implicit none + class(hyperparameters_t), intent(in) :: self + integer num_mini_batches + end function + + elemental module function optimizer_name(self) result(identifier) + implicit none + class(hyperparameters_t), intent(in) :: self + type(string_t) identifier + end function + + + elemental module function learning_rate(self) result(rate) + implicit none + class(hyperparameters_t), intent(in) :: self + real(rkind) rate + end function end interface end module diff --git a/src/inference_engine/hyperparameters_s.f90 b/src/inference_engine/hyperparameters_s.f90 index 31e67b64b..f6b442008 100644 --- a/src/inference_engine/hyperparameters_s.f90 +++ b/src/inference_engine/hyperparameters_s.f90 @@ -63,4 +63,16 @@ ] end procedure + module procedure mini_batches + num_mini_batches = self%mini_batches_ + end procedure + + module procedure optimizer_name + identifier = string_t(self%optimizer_) + end procedure + + module procedure learning_rate + rate = self%learning_rate_ + end procedure + end submodule hyperparameters_s diff --git a/src/inference_engine/network_configuration_m.f90 b/src/inference_engine/network_configuration_m.f90 index 30e85cf0d..8f358e531 100644 --- a/src/inference_engine/network_configuration_m.f90 +++ b/src/inference_engine/network_configuration_m.f90 @@ -9,11 +9,14 @@ module network_configuration_m private logical :: skip_connections_ = .false. integer, allocatable :: nodes_per_layer_(:) - character(len=:), allocatable :: activation_function_ + character(len=:), allocatable :: activation_name_ contains procedure :: to_json procedure :: equals generic :: operator(==) => equals + procedure :: activation_name + procedure :: nodes_per_layer + procedure :: skip_connections end type interface network_configuration_t @@ -24,11 +27,11 @@ pure module function from_json(lines) result(network_configuration) type(network_configuration_t) network_configuration end function - pure module function from_components(skip_connections, nodes_per_layer, activation_function) result(network_configuration) + pure module function from_components(skip_connections, nodes_per_layer, activation_name) result(network_configuration) implicit none logical, intent(in) :: skip_connections integer, intent(in) :: nodes_per_layer(:) - character(len=*), intent(in) :: activation_function + character(len=*), intent(in) :: activation_name type(network_configuration_t) network_configuration end function @@ -48,6 +51,25 @@ elemental module function equals(lhs, rhs) result(lhs_equals_rhs) logical lhs_equals_rhs end function + elemental module function activation_name(self) result(string) + implicit none + class(network_configuration_t), intent(in) :: self + type(string_t) string + end function + + pure module function nodes_per_layer(self) result(nodes) + implicit none + class(network_configuration_t), intent(in) :: self + integer, allocatable :: nodes(:) + end function + + elemental module function skip_connections(self) result(using_skip) + implicit none + class(network_configuration_t), intent(in) :: self + logical using_skip + end function + + end interface end module diff --git a/src/inference_engine/network_configuration_s.f90 b/src/inference_engine/network_configuration_s.f90 index 99a810aa4..73c30a99f 100644 --- a/src/inference_engine/network_configuration_s.f90 +++ b/src/inference_engine/network_configuration_s.f90 @@ -5,23 +5,24 @@ character(len=*), parameter :: skip_connections_key = "skip connections" character(len=*), parameter :: nodes_per_layer_key = "nodes per layer" - character(len=*), parameter :: activation_function_key = "activation function" + character(len=*), parameter :: activation_name_key = "activation function" contains module procedure from_components network_configuration%skip_connections_ = skip_connections network_configuration%nodes_per_layer_ = nodes_per_layer - network_configuration%activation_function_ = activation_function + network_configuration%activation_name_ = activation_name end procedure module procedure equals - call assert(allocated(lhs%activation_function_) .and. allocated(rhs%activation_function_), "network_configuration_s(equals): allocated activation_functions") + call assert(allocated(lhs%activation_name_) .and. allocated(rhs%activation_name_), & + "network_configuration_s(equals): allocated({lhs,rhs}%activation_name_)") lhs_equals_rhs = & lhs%skip_connections_ .eqv. rhs%skip_connections_ .and. & - lhs%activation_function_ == rhs%activation_function_ .and. & + lhs%activation_name_ == rhs%activation_name_ .and. & all(lhs%nodes_per_layer_ == rhs%nodes_per_layer_) end procedure @@ -37,7 +38,7 @@ network_configuration_key_found = .true. network_configuration%skip_connections_ = lines(l+1)%get_json_value(string_t(skip_connections_key), mold=.true.) network_configuration%nodes_per_layer_ = lines(l+2)%get_json_value(string_t(nodes_per_layer_key), mold=[integer::]) - network_configuration%activation_function_ = lines(l+3)%get_json_value(string_t(activation_function_key), mold=string_t("")) + network_configuration%activation_name_ = lines(l+3)%get_json_value(string_t(activation_name_key), mold=string_t("")) return end if end do @@ -60,9 +61,21 @@ string_t(indent // '"network configuration": {'), & string_t(indent // indent // '"' // skip_connections_key // '" : ' // trim(adjustl(skip_connections_string )) // ','), & string_t(indent // indent // '"' // nodes_per_layer_key // '" : [' // trim(adjustl(nodes_per_layer_string )) // '],'), & - string_t(indent // indent // '"' // activation_function_key // '" : "' // trim(adjustl(self%activation_function_)) // '"' ), & + string_t(indent // indent // '"' // activation_name_key // '" : "' // trim(adjustl(self%activation_name_)) // '"' ), & string_t(indent // '}') & ] end procedure + module procedure activation_name + string = self%activation_name_ + end procedure + + module procedure nodes_per_layer + nodes = self%nodes_per_layer_ + end procedure + + module procedure skip_connections + using_skip = self%skip_connections_ + end procedure + end submodule network_configuration_s diff --git a/src/inference_engine/trainable_engine_m.f90 b/src/inference_engine/trainable_engine_m.f90 index b1bd8e9cc..22a465092 100644 --- a/src/inference_engine/trainable_engine_m.f90 +++ b/src/inference_engine/trainable_engine_m.f90 @@ -2,7 +2,6 @@ ! Terms of use are as specified in LICENSE.txt module trainable_engine_m !! Define an abstraction that supports training a neural network - use sourcery_m, only : string_t use inference_engine_m_, only : inference_engine_t use differentiable_activation_strategy_m, only : differentiable_activation_strategy_t @@ -20,7 +19,7 @@ module trainable_engine_m type(string_t), allocatable :: metadata_(:) real(rkind), allocatable :: w(:,:,:) ! weights real(rkind), allocatable :: b(:,:) ! biases - integer, allocatable :: n(:) ! nuerons per layer + integer, allocatable :: n(:) ! nodes per layer class(differentiable_activation_strategy_t), allocatable :: differentiable_activation_strategy_ contains procedure :: assert_consistent @@ -61,12 +60,13 @@ pure module subroutine assert_consistent(self) class(trainable_engine_t), intent(in) :: self end subroutine - pure module subroutine train(self, mini_batches, cost, adam) + pure module subroutine train(self, mini_batches, cost, adam, learning_rate) implicit none class(trainable_engine_t), intent(inout) :: self type(mini_batch_t), intent(in) :: mini_batches(:) real(rkind), intent(out), allocatable, optional :: cost(:) - logical, intent(in), optional :: adam + logical, intent(in) :: adam + real(rkind), intent(in) :: learning_rate end subroutine elemental module function infer(self, inputs) result(outputs) diff --git a/src/inference_engine/trainable_engine_s.f90 b/src/inference_engine/trainable_engine_s.f90 index b9f00e581..da87c1648 100644 --- a/src/inference_engine/trainable_engine_s.f90 +++ b/src/inference_engine/trainable_engine_s.f90 @@ -86,12 +86,14 @@ module procedure train integer l, batch, mini_batch_size, pair - real(rkind), parameter :: eta = 1.5e0 ! Learning parameter real(rkind), allocatable :: & z(:,:), a(:,:), delta(:,:), dcdw(:,:,:), dcdb(:,:), vdw(:,:,:), sdw(:,:,:), vdb(:,:), sdb(:,:), vdwc(:,:,:), sdwc(:,:,:), & vdbc(:,:), sdbc(:,:) - type(tensor_t), allocatable :: inputs(:), expected_outputs(:) + real(rkind) eta, alpha + + eta = learning_rate + alpha = learning_rate call self%assert_consistent @@ -176,37 +178,31 @@ end do iterate_through_batch - if (present(adam)) then - if (adam) then - - block - ! Adam parameters - real, parameter :: beta(*) = [.9_rkind, .999_rkind] - real, parameter :: obeta(*) = [1._rkind - beta(1), 1._rkind - beta(2)] - real, parameter :: epsilon = real(1.D-08,rkind) - real, parameter :: alpha = 1.5_rkind ! Learning parameter - - adjust_weights_and_biases: & - do l = 1,output_layer - dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size) - vdw(1:n(l),1:n(l-1),l) = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l) - sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2) - vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(1)**num_mini_batches) - sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(2)**num_mini_batches) - w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) & - - alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights - - dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size - vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l) - sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2) - vdbc(1:n(l),l) = vdb(1:n(l),l)/(1._rkind - beta(1)**num_mini_batches) - sdbc(1:n(l),l) = sdb(1:n(l),l)/(1._rkind - beta(2)**num_mini_batches) - b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights - end do adjust_weights_and_biases - end block - else - error stop "trainable_engine_s(train): for non-adam runs, please rerun without adam argument present" - end if + if (adam) then + block + ! Adam parameters + real, parameter :: beta(*) = [.9_rkind, .999_rkind] + real, parameter :: obeta(*) = [1._rkind - beta(1), 1._rkind - beta(2)] + real, parameter :: epsilon = real(1.D-08,rkind) + + adjust_weights_and_biases: & + do l = 1,output_layer + dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size) + vdw(1:n(l),1:n(l-1),l) = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l) + sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2) + vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(1)**num_mini_batches) + sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(2)**num_mini_batches) + w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) & + - alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights + + dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size + vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l) + sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2) + vdbc(1:n(l),l) = vdb(1:n(l),l)/(1._rkind - beta(1)**num_mini_batches) + sdbc(1:n(l),l) = sdb(1:n(l),l)/(1._rkind - beta(2)**num_mini_batches) + b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights + end do adjust_weights_and_biases + end block else adjust_weights_and_biases: & do l = 1,output_layer diff --git a/src/inference_engine/training_configuration_m.f90 b/src/inference_engine/training_configuration_m.f90 index 300990289..8d9cf26d1 100644 --- a/src/inference_engine/training_configuration_m.f90 +++ b/src/inference_engine/training_configuration_m.f90 @@ -2,6 +2,8 @@ module training_configuration_m use sourcery_m, only : string_t, file_t use hyperparameters_m, only : hyperparameters_t use network_configuration_m, only : network_configuration_t + use kind_parameters_m, only : rkind + use differentiable_activation_strategy_m, only : differentiable_activation_strategy_t implicit none private @@ -15,6 +17,12 @@ module training_configuration_m procedure :: to_json procedure :: equals generic :: operator(==) => equals + procedure :: mini_batches + procedure :: optimizer_name + procedure :: learning_rate + procedure :: differentiable_activation_strategy + procedure :: nodes_per_layer + procedure :: skip_connections end type interface training_configuration_t @@ -48,6 +56,42 @@ elemental module function equals(lhs, rhs) result(lhs_eq_rhs) logical lhs_eq_rhs end function + elemental module function mini_batches(self) result(num_mini_batches) + implicit none + class(training_configuration_t), intent(in) :: self + integer num_mini_batches + end function + + elemental module function optimizer_name(self) result(identifier) + implicit none + class(training_configuration_t), intent(in) :: self + type(string_t) identifier + end function + + elemental module function learning_rate(self) result(rate) + implicit none + class(training_configuration_t), intent(in) :: self + real(rkind) rate + end function + + module function differentiable_activation_strategy(self) result(strategy) + implicit none + class(training_configuration_t), intent(in) :: self + class(differentiable_activation_strategy_t), allocatable :: strategy + end function + + module function nodes_per_layer(self) result(nodes) + implicit none + class(training_configuration_t), intent(in) :: self + integer, allocatable :: nodes(:) + end function + + elemental module function skip_connections(self) result(using_skip) + implicit none + class(training_configuration_t), intent(in) :: self + logical using_skip + end function + end interface end module diff --git a/src/inference_engine/training_configuration_s.f90 b/src/inference_engine/training_configuration_s.f90 index 29382a665..0377a0dbc 100644 --- a/src/inference_engine/training_configuration_s.f90 +++ b/src/inference_engine/training_configuration_s.f90 @@ -1,5 +1,6 @@ submodule(training_configuration_m) training_configuration_s use assert_m, only : assert + use inference_engine_m, only : relu_t, sigmoid_t, swish_t implicit none character(len=*), parameter :: header="{", footer="}", separator = "," @@ -46,4 +47,41 @@ lhs%network_configuration_ == rhs%network_configuration_ end procedure + module procedure mini_batches + num_mini_batches = self%hyperparameters_%mini_batches() + end procedure + + module procedure optimizer_name + identifier = self%hyperparameters_%optimizer_name() + end procedure + + module procedure learning_rate + rate = self%hyperparameters_%learning_rate() + end procedure + + module procedure nodes_per_layer + nodes = self%network_configuration_%nodes_per_layer() + end procedure + + module procedure skip_connections + using_skip = self%network_configuration_%skip_connections() + end procedure + + module procedure differentiable_activation_strategy + + associate(activation_name => self%network_configuration_%activation_name()) + select case(activation_name%string()) + case ("relu") + strategy = relu_t() + case ("sigmoid") + strategy = sigmoid_t() + case ("swish") + strategy = swish_t() + case default + error stop 'activation_strategy_factory_s(factory): unrecognized activation name "' // activation_name%string() // '"' + end select + end associate + + end procedure + end submodule training_configuration_s diff --git a/test/network_configuration_test_m.f90 b/test/network_configuration_test_m.f90 index 1c6c2cbd8..b512d640f 100644 --- a/test/network_configuration_test_m.f90 +++ b/test/network_configuration_test_m.f90 @@ -54,7 +54,7 @@ function write_then_read_network_configuration() result(test_passes) logical test_passes associate(constructed_from_components=> & - network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_function="sigmoid")) + network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_name="sigmoid")) associate(constructed_from_json => network_configuration_t(constructed_from_components%to_json())) test_passes = constructed_from_components == constructed_from_json end associate diff --git a/test/trainable_engine_test_m.f90 b/test/trainable_engine_test_m.f90 index 746325943..5603362bf 100644 --- a/test/trainable_engine_test_m.f90 +++ b/test/trainable_engine_test_m.f90 @@ -167,7 +167,7 @@ function and_gate_with_skewed_training_data() result(test_passes) mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)] trainable_engine = two_zeroed_hidden_layers() - call trainable_engine%train(mini_batches) + call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5) test_inputs = [tensor_t([true,true]), tensor_t([false,true]), tensor_t([true,false]), tensor_t([false,false])] expected_test_outputs = [(and(test_inputs(i)), i=1, size(test_inputs))] @@ -211,7 +211,7 @@ function not_and_gate_with_skewed_training_data() result(test_passes) mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)] trainable_engine = two_zeroed_hidden_layers() - call trainable_engine%train(mini_batches) + call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5) test_inputs = [tensor_t([true,true]), tensor_t([false,true]), tensor_t([true,false]), tensor_t([false,false])] expected_test_outputs = [(not_and(test_inputs(i)), i=1, size(test_inputs))] @@ -253,7 +253,7 @@ function or_gate_with_random_weights() result(test_passes) mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)] trainable_engine = two_random_hidden_layers() - call trainable_engine%train(mini_batches) + call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5) test_inputs = [tensor_t([true,true]), tensor_t([false,true]), tensor_t([true,false]), tensor_t([false,false])] expected_test_outputs = [(or(test_inputs(i)), i=1, size(test_inputs))] @@ -297,7 +297,7 @@ function xor_gate_with_random_weights() result(test_passes) mini_batches = [(mini_batch_t(input_output_pair_t(training_inputs(:,iter), training_outputs(:,iter))), iter=1, num_iterations)] trainable_engine = two_random_hidden_layers() - call trainable_engine%train(mini_batches) + call trainable_engine%train(mini_batches, adam=.false., learning_rate=1.5) test_inputs = [tensor_t([true,true]), tensor_t([false,true]), tensor_t([true,false]), tensor_t([false,false])] expected_test_outputs = [(xor(test_inputs(i)), i=1, size(test_inputs))] @@ -364,7 +364,7 @@ function preserves_identity_mapping() result(test_passes) do epoch = 1,num_epochs mini_batches = [(mini_batch_t(input_output_pairs(bins(bin)%first():bins(bin)%last())), bin = 1, size(bins))] - call trainable_engine%train(mini_batches, cost) + call trainable_engine%train(mini_batches, cost, adam=.false., learning_rate=1.5) end do block @@ -415,7 +415,7 @@ function perturbed_identity_converges() result(test_passes) call random_number(random_numbers) call shuffle(input_output_pairs, random_numbers) mini_batches = [(mini_batch_t(input_output_pairs(bins(bin)%first():bins(bin)%last())), bin = 1, size(bins))] - call trainable_engine%train(mini_batches, cost, adam=.true.) + call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5) end do block diff --git a/test/training_configuration_test_m.f90 b/test/training_configuration_test_m.f90 index 3701a6ad5..b9ffdb3d7 100644 --- a/test/training_configuration_test_m.f90 +++ b/test/training_configuration_test_m.f90 @@ -53,10 +53,9 @@ function results() result(test_results) function construct_and_convert_to_and_from_json() result(test_passes) logical test_passes - associate(training_configuration => training_configuration_t( & hyperparameters_t(mini_batches=5, learning_rate=1., optimizer = "adam"), & - network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_function="sigmoid") & + network_configuration_t(skip_connections=.false., nodes_per_layer=[2,72,2], activation_name="sigmoid") & )) associate(from_json => training_configuration_t(file_t(training_configuration%to_json()))) test_passes = training_configuration == from_json