diff --git a/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out b/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out index b867f0c375..03efe9ecaa 100644 --- a/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out +++ b/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out @@ -2,58 +2,58 @@ time = 0.000901 variables minimum value maximum value density 499999.99996 1003267.4037 - xmom -1.64054094e+12 1.64054094e+12 - ymom -1.64054094e+12 1.64054094e+12 + xmom -1.6405409458e+12 1.6405409458e+12 + ymom -1.6405409458e+12 1.6405409458e+12 zmom 0 0 - rho_E 2.263117857e+22 7.0166374851e+22 - rho_e 2.263117857e+22 7.0166358897e+22 - Temp 300032898.05 454530914.06 - rho_He4 499997.75752 1002591.9748 - rho_C12 2.2423384483 675.42858349 - rho_O16 5.0001038346e-05 0.00023067858795 + rho_E 2.263117857e+22 7.0166374758e+22 + rho_e 2.263117857e+22 7.0166358804e+22 + Temp 300032898.05 454530912.93 + rho_He4 499997.75752 1002591.9749 + rho_C12 2.2423384483 675.42842495 + rho_O16 5.0001038346e-05 0.00023067823575 rho_Fe56 4.9999999996e-05 0.00010032674037 - rho_enuc 1.4565745423e+21 4.5249954871e+23 - density_sdc_source -544925.79275 78513.247889 - xmom_sdc_source -1.8266340469e+15 1.8266340469e+15 - ymom_sdc_source -1.8266340469e+15 1.8266340469e+15 + rho_enuc 1.4565745423e+21 4.5249964745e+23 + density_sdc_source -544925.80758 78513.247869 + xmom_sdc_source -1.8266340546e+15 1.8266340546e+15 + ymom_sdc_source -1.8266340546e+15 1.8266340546e+15 zmom_sdc_source 0 0 - rho_E_sdc_source -6.1992456785e+22 7.5781242406e+21 - rho_e_sdc_source -6.1984065959e+22 6.3082607681e+21 + rho_E_sdc_source -6.1992501236e+22 7.5781242663e+21 + rho_e_sdc_source -6.1984110404e+22 6.3082607836e+21 Temp_sdc_source 0 0 - rho_He4_sdc_source -543231.29383 78510.973919 - rho_C12_sdc_source -1694.4890575 10.073635142 - rho_O16_sdc_source -0.0098093776861 7.1506350196e-06 - rho_Fe56_sdc_source -5.4492579275e-05 7.8513247889e-06 - pressure 1.4155320805e+22 4.2608130858e+22 - kineng 6.7398897838e-15 1.6647276226e+18 - soundspeed 212069503.63 257404342.21 + rho_He4_sdc_source -543231.23569 78510.973875 + rho_C12_sdc_source -1694.5620221 10.07378628 + rho_O16_sdc_source -0.0098098270645 7.1506434889e-06 + rho_Fe56_sdc_source -5.4492580758e-05 7.8513247869e-06 + pressure 1.4155320805e+22 4.2608130805e+22 + kineng 2.4519179612e-13 1.6647276308e+18 + soundspeed 212069503.63 257404342.05 Gamma_1 1.5601126452 1.5885713572 - MachNumber 7.7424457878e-19 0.0086982771596 - entropy 348439780.9 349209883.57 - magvort 0 0.00018541051835 - divu -0.1163387912 0.55816306007 - eint_E 4.5262357143e+16 6.9937847678e+16 - eint_e 4.5262357143e+16 6.9937843728e+16 + MachNumber 4.669869489e-18 0.008698277168 + entropy 348439780.9 349209883.38 + magvort 0 0.00018541051834 + divu -0.11633879117 0.55816307483 + eint_E 4.5262357143e+16 6.9937847586e+16 + eint_e 4.5262357143e+16 6.9937843636e+16 logden 5.6989700043 6.0014167022 StateErr_0 499999.99996 1003267.4037 - StateErr_1 300032898.05 454530914.06 - StateErr_2 0.9993267708 0.99999551512 - X(He4) 0.9993267708 0.99999551512 - X(C12) 4.484676897e-06 0.00067322887298 - X(O16) 1.000020767e-10 2.2992732257e-10 + StateErr_1 300032898.05 454530912.93 + StateErr_2 0.99932677096 0.99999551512 + X(He4) 0.99932677096 0.99999551512 + X(C12) 4.484676897e-06 0.00067322871496 + X(O16) 1.000020767e-10 2.2992697152e-10 X(Fe56) 1e-10 1e-10 - abar 4.0000119598 4.0017960842 + abar 4.0000119598 4.0017960838 Ye 0.5 0.5 - x_velocity -2064636.8122 2064636.8122 - y_velocity -2064636.8122 2064636.8122 + x_velocity -2064636.8156 2064636.8156 + y_velocity -2064636.8156 2064636.8156 z_velocity 0 0 - t_sound_t_enuc 0.00023710316663 0.0195732693 - enuc 2.9131490847e+15 4.5102586513e+17 - magvel 1.6419366351e-10 2067446.1363 - radvel -0.00067838574982 2067446.1363 - circvel 0 11.820144682 - magmom 8.209683175e-05 1.6422547233e+12 - angular_momentum_x 0 0 - angular_momentum_y 0 0 - angular_momentum_z -1.2410862732e+14 1.2410862751e+14 + t_sound_t_enuc 0.00023710316663 0.019573273608 + enuc 2.9131490847e+15 4.5102596355e+17 + magvel 9.9033690456e-10 2067446.1392 + radvel -0.00067838913019 2067446.1391 + circvel 0 11.820185992 + magmom 0.00049516845224 1.6422547294e+12 + angular_momentum_x -0 -0 + angular_momentum_y -0 -0 + angular_momentum_z -1.2410867567e+14 1.2410867593e+14 diff --git a/Source/driver/_cpp_parameters b/Source/driver/_cpp_parameters index e6e06797ec..ec98042eb0 100644 --- a/Source/driver/_cpp_parameters +++ b/Source/driver/_cpp_parameters @@ -267,9 +267,6 @@ sdc_extra int 0 # which SDC nonlinear solver to use? 1 = Newton, 2 = VODE, 3 = VODE for first iter sdc_solver int 1 -# relative tolerance for the nonlinear solve on rho with SDC -sdc_solver_tol_dens Real 1.e-6 - # relative tolerance for the nonlinear solve on rho X_k with SDC sdc_solver_tol_spec Real 1.e-6 diff --git a/Source/reactions/Castro_react_util.H b/Source/reactions/Castro_react_util.H index f64bfc6836..756f088649 100644 --- a/Source/reactions/Castro_react_util.H +++ b/Source/reactions/Castro_react_util.H @@ -10,10 +10,6 @@ #include -constexpr int iwrho = 0; -constexpr int iwfs = 1; -constexpr int iwe = NumSpec+1; - // #include AMREX_GPU_HOST_DEVICE AMREX_INLINE diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index df0f0faebe..a8fbd2d1ec 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -14,10 +14,11 @@ constexpr int CONVERGENCE_FAILURE = -2; AMREX_GPU_HOST_DEVICE AMREX_INLINE void f_sdc_jac(const Real dt_m, - GpuArray const& U, - GpuArray& f, - RArray2D& Jac, - GpuArray& f_source, + Array1D const& U, + Array1D& f, + JacNetArray2D& Jac, + Array1D& f_source, + const Real rho, GpuArray& mom_source, const Real T_old, const Real E_var) { @@ -26,28 +27,20 @@ f_sdc_jac(const Real dt_m, GpuArray U_full; GpuArray R_full; - GpuArray R_react; - Array2D dRdw = {0.0_rt}; - Array2D dwdU = {0.0_rt}; - - Array2D dRdU; + Array1D R_react; // we are not solving the momentum equations // create a full state -- we need this for some interfaces - U_full[URHO] = U[0]; + + U_full[URHO] = rho; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = U[1+n]; + U_full[UFS+n] = U(n+1); } - if (sdc_solve_for_rhoe == 1) { - U_full[UEINT] = U[NumSpec+1]; - U_full[UEDEN] = E_var; - } else { - U_full[UEDEN] = U[NumSpec+1]; - U_full[UEINT] = E_var; - } + U_full[UEINT] = U(NumSpec+1); + U_full[UEDEN] = E_var; for (int n = 0; n < 3; ++n) { U_full[UMX+n] = mom_source[n]; @@ -76,107 +69,52 @@ f_sdc_jac(const Real dt_m, eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; } #if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - eos_state.aux[n] = U_full[UFX+n] / U_full[URHO]; - } + amrex::Error("error: aux data not currently supported in true SDC"); #endif - eos_state.e = U_full[UEINT] / U_full[URHO]; // (U_full[UEDEN] - 0.5_rt*sum(U_full(UMX:UMZ))/U_full[URHO])/U_full[URHO] + + eos_state.e = U_full[UEINT] / U_full[URHO]; eos(eos_input_re, eos_state); U_full[UTEMP] = eos_state.T; // we'll create a burn_state to pass stuff from the RHS to the Jac function + burn_t burn_state; single_zone_react_source(U_full, R_full, burn_state); // store the subset of R used in the Jacobian - R_react[0] = R_full[URHO]; - for (int n = 0; n < NumSpec; ++n) { - R_react[n+1] = R_full[UFS+n]; - } - if (sdc_solve_for_rhoe == 1) { - R_react[NumSpec+1] = R_full[UEINT]; - } else { - R_react[NumSpec+1] = R_full[UEDEN]; - } - - for (int n = 0; n < NumSpec+2; ++n) { - f[n] = U[n] - dt_m * R_react[n] - f_source[n]; - } - // get dRdw -- this may do a numerical approximation or use the - // network's analytic Jac - single_zone_jac(U_full, burn_state, dt_m, dRdw); - - // the density row - dwdU(iwrho, 0) = 1.0_rt; - - // the X_k rows - for (int m = 1; m < NumSpec+1; ++m) { - dwdU(iwfs-1+m, 0) = -U[m] / (U[0]*U[0]); - dwdU(iwfs-1+m, m) = 1.0_rt / U[0]; - } - - auto eos_xderivs = composition_derivatives(eos_state); - - // now the e row -- this depends on whether we are evolving (rho E) or (rho e) - auto denom = 1.0_rt / eos_state.rho; - auto xn_sum = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { - xn_sum += eos_state.xn[n] * eos_xderivs.dedX[n]; + R_react(n+1) = R_full[UFS+n]; } + R_react(NumSpec+1) = R_full[UEINT]; - if (sdc_solve_for_rhoe == 1) { - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e); - } else { - auto u2_sum = 0.0_rt; - for (auto n = 0; n < 3; ++n) { - u2_sum += U_full[UMX+n] * U_full[UMX+n]; - } - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e - - 0.5_rt * u2_sum / (eos_state.rho * eos_state.rho)); - } + // we are solving J dU = -f + // where f is Eq. 36 evaluated with the current guess for U - for (int m = 0; m < NumSpec; ++m) { - dwdU(iwe,m+1) = -denom * eos_xderivs.dedX[m]; + for (int n = 1; n < NumSpec+1; ++n) { + f(n) = U(n) - dt_m * R_react(n) - f_source(n); } - dwdU(iwe, NumSpec+1) = denom; + // get the Jacobian. - // construct the Jacobian -- we can get most of the - // terms from the network itself, but we do not rely on - // it having derivative wrt density + /// First we get the dR/dU expression. + // Instead of the decomposition into dw/dU and dR/dw + // written out in the original paper Appendix A, we instead use the + // form from the simplified-SDC paper (Zingale et al. 2022). Note: + // we are not including density anymore. - // Note: Jac is 1-based, because that is what the linear algebra - // routines expect but the components, dRdw and dwdU are 0-based! + single_zone_jac(U_full, burn_state, dt_m, Jac); - for (int n = 1; n <= NumSpec+2; ++n) { - for (int m = 1; m <= NumSpec+2; ++m) { - Jac(n, m) = 0.0_rt; - } - } + // Our Jacobian has the form: J = I - dt dR/dw dwdU + // (Eq. 38), so now we fix that - for (int m = 1; m <= NumSpec+2; ++m) { - Jac(m, m) = 1.0_rt; - } - - // auto dRdU = matmul(dRdw, dwdU); - // dRdU is 0-based - - for (int n = 0; n <= NumSpec+1; ++n) { - for (int m = 0; m <= NumSpec+1; ++m) { - dRdU(n,m) = 0.0_rt; - for (int l = 0; l <= NumSpec+1; ++l) { - dRdU(n,m) += dRdw(n,l) * dwdU(l,m); - } - } - } - - for (int n = 0; n < NumSpec+2; ++n) { - for (int m = 0; m < NumSpec+2; ++m) { - Jac(n+1, m+1) -= dt_m * dRdU(n,m); + for (int n = 1; n <= NumSpec+1; ++n) { + for (int m = 1; m <= NumSpec+1; ++m) { + Real coeff = (m == n) ? 1.0_rt : 0.0_rt; + Jac(n, m) = coeff - dt_m * Jac(n, m); } } @@ -194,25 +132,25 @@ sdc_newton_solve(const Real dt_m, int& ierr) { // the purpose of this function is to solve the system // U - dt R(U) = U_old + dt C using a Newton solve. + // This is Eq. 36 in the paper. // // here, U_new should come in as a guess for the new U // and will be returned with the value that satisfied the // nonlinear function - RArray2D Jac; + JacNetArray2D Jac; // we will do the implicit update of only the terms that // have reactive sources // - // 0 : rho // 1:NumSpec : species - // NumSpec+1 : (rho E) or (rho e) + // NumSpec+1 : (rho e) - GpuArray U_react; - GpuArray f_source; + Array1D U_react; + Array1D f_source; GpuArray mom_source; - GpuArray dU_react; - GpuArray f; + Array1D dU_react; + Array1D f; RArray1D f_rhs; const int MAX_ITER = 100; @@ -222,11 +160,13 @@ sdc_newton_solve(const Real dt_m, // the tolerance we are solving to may depend on the // iteration Real relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); - Real tol_dens = sdc_solver_tol_dens * relax_fac; Real tol_spec = sdc_solver_tol_spec * relax_fac; Real tol_ener = sdc_solver_tol_ener * relax_fac; - // update the momenta for this zone -- they don't react + // update the density and momenta for this zone -- they don't react + + U_new[URHO] = U_old[URHO] + dt_m * C[URHO]; + for (int n = 0; n < 3; ++n) { U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; } @@ -235,22 +175,15 @@ sdc_newton_solve(const Real dt_m, // nonlinear solve -- note: we include the old state in // f_source - // load rpar - // for the Jacobian solve, we are solving // f(U) = U - dt R(U) - U_old - dt C = 0 // we define f_source = U_old + dt C so we are solving // f(U) = U - dt R(U) - f_source = 0 - f_source[0] = U_old[URHO] + dt_m * C[URHO]; for (int n = 0; n < NumSpec; ++n) { - f_source[1 + n] = U_old[UFS + n] + dt_m * C[UFS + n]; - } - if (sdc_solve_for_rhoe == 1) { - f_source[NumSpec+1] = U_old[UEINT] + dt_m * C[UEINT]; - } else { - f_source[NumSpec+1] = U_old[UEDEN] + dt_m * C[UEDEN]; + f_source(1 + n) = U_old[UFS + n] + dt_m * C[UFS + n]; } + f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT]; // set the momenta to be U_new for (int n = 0; n < 3; ++n) { @@ -263,25 +196,19 @@ sdc_newton_solve(const Real dt_m, // we should be able to do an update for this somehow? - Real E_var; + Real E_var = U_new[UEDEN]; - if (sdc_solve_for_rhoe == 1) { - E_var = U_new[UEDEN]; - } else { - E_var = U_new[UEINT]; - } + // we need the density too + + Real rho_new = U_new[URHO]; // store the subset for the nonlinear solve // We use an initial guess if possible - U_react[0] = U_new[URHO]; + for (int n = 0; n < NumSpec; ++n) { - U_react[1+n] = U_new[UFS+n]; - } - if (sdc_solve_for_rhoe == 1) { - U_react[NumSpec+1] = U_new[UEINT]; - } else { - U_react[NumSpec+1] = U_new[UEDEN]; + U_react(1+n) = U_new[UFS+n]; } + U_react(NumSpec+1) = U_new[UEINT]; #if (INTEGRATOR == 0) @@ -289,14 +216,13 @@ sdc_newton_solve(const Real dt_m, // iterative loop int iter = 0; - int max_newton_iter = MAX_ITER; Real err = 1.e30_rt; bool converged = false; - while (!converged && iter < max_newton_iter) { + while (!converged && iter < MAX_ITER) { int info = 0; - f_sdc_jac(dt_m, U_react, f, Jac, f_source, mom_source, T_old, E_var); + f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var); IArray1D ipvt; @@ -305,15 +231,15 @@ sdc_newton_solve(const Real dt_m, RHS::dgefa(Jac); info = 0; #else - dgefa(Jac, ipvt, info); + dgefa(Jac, ipvt, info); #endif if (info != 0) { ierr = SINGULAR_MATRIX; return; } - for (int n = 1; n <= NumSpec+2; ++n) { - f_rhs(n) = -f[n-1]; + for (int n = 1; n <= NumSpec+1; ++n) { + f_rhs(n) = -f(n); } #ifdef NEW_NETWORK_IMPLEMENTATION @@ -322,35 +248,35 @@ sdc_newton_solve(const Real dt_m, dgesl(Jac, ipvt, f_rhs); #endif - for (int n = 0; n < NumSpec+2; ++n) { - dU_react[n] = f_rhs(n+1); + for (int n = 1; n < NumSpec+1; ++n) { + dU_react(n) = f_rhs(n); } // how much of dU_react should we apply? Real eta = 1.0_rt; - for (int n = 0; n < NumSpec+2; ++n) { - dU_react[n] *= eta; - U_react[n] += dU_react[n]; + for (int n = 1; n < NumSpec+1; ++n) { + U_react(n) += eta * dU_react(n); } - GpuArray eps_tot; - - eps_tot[0] = tol_dens * std::abs(U_react[0]) + sdc_solver_atol; + Array1D eps_tot; // for species, atol is the mass fraction limit, so we // multiply by density to get a partial density limit + for (int n = 0; n < NumSpec; ++n) { - eps_tot[1 + n] = tol_spec * std::abs(U_react[1 + n]) + sdc_solver_atol * std::abs(U_react[0]); + eps_tot(1 + n) = tol_spec * std::abs(U_react(1 + n)) + + sdc_solver_atol * std::abs(U_new[URHO]); } - eps_tot[NumSpec+1] = tol_ener * std::abs(U_react[NumSpec+1]) + sdc_solver_atol; + eps_tot(NumSpec+1) = tol_ener * std::abs(U_react(NumSpec+1)) + sdc_solver_atol; // compute the norm of the weighted error, where the // weights are 1/eps_tot + auto err_sum = 0.0_rt; - for (int n = 0; n < NumSpec+2; ++n) { - err_sum += dU_react[n] * dU_react[n] / (eps_tot[n]* eps_tot[n]); + for (int n = 1; n <= NumSpec+1; ++n) { + err_sum += dU_react(n) * dU_react(n) / (eps_tot(n) * eps_tot(n)); } - err = std::sqrt(err_sum / (NumSpec+2)); + err = std::sqrt(err_sum / static_cast(NumSpec+1)); if (err < 1.0_rt) { converged = true; @@ -368,24 +294,18 @@ sdc_newton_solve(const Real dt_m, #endif // update the full U_new - // if we updated total energy, then correct internal, - // or vice versa - U_new[URHO] = U_react[0]; + for (int n = 0; n < NumSpec; ++n) { - U_new[UFS+n] = U_react[1+n]; + U_new[UFS+n] = U_react(1+n); } auto v2 = 0.0_rt; for (int m = 0; m < 3; ++m) { v2 += U_new[UMX+m] * U_new[UMX+m]; } - if (sdc_solve_for_rhoe == 1) { - U_new[UEINT] = U_react[NumSpec+1]; - U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; - } else { - U_new[UEDEN] = U_react[NumSpec+1]; - U_new[UEINT] = U_new[UEDEN] - 0.5_rt * v2 / U_new[URHO]; - } + U_new[UEINT] = U_react(NumSpec+1); + U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; + } AMREX_GPU_HOST_DEVICE AMREX_INLINE diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 3357612879..47c4734d5c 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -1,6 +1,10 @@ #ifndef SDC_REACT_UTIL_H #define SDC_REACT_UTIL_H +constexpr int iwfs = 1; +constexpr int iwe = NumSpec+1; + + AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_react_source(GpuArray const& state, @@ -29,9 +33,7 @@ single_zone_react_source(GpuArray const& state, eos_t eos_state; // Ensure that the temperature going in is consistent with the internal energy. - burn_to_eos(burn_state, eos_state); - eos(eos_input_re, eos_state); - eos_to_burn(eos_state, burn_state); + eos(eos_input_re, burn_state); // eos_get_small_temp(&small_temp); burn_state.T = amrex::min(MAX_TEMP, amrex::max(burn_state.T, small_temp)); @@ -60,6 +62,7 @@ single_zone_react_source(const int i, const int j, const int k, Array4 const& state, Array4 const& R, burn_t& burn_state) { + GpuArray state_arr; GpuArray R_arr; @@ -79,7 +82,7 @@ AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_jac(GpuArray const& state, burn_t& burn_state, const Real dt, - Array2D& dRdw) { + JacNetArray2D& Jac) { #ifdef SIMPLIFIED_SDC #ifndef AMREX_USE_GPU @@ -87,26 +90,25 @@ single_zone_jac(GpuArray const& state, #endif #else - Array1D ydot; - Array1D ydot_pert; - JacNetArray2D Jac; - const auto eps = 1.e-8_rt; - actual_rhs(burn_state, ydot); - // Jac has the derivatives with respect to the native - // network variables, X, T. e. It does not have derivatives with - // respect to density, so we'll have to compute those ourselves. + // network variables, X, e. Note: the e derivative if (sdc_newton_use_analytic_jac == 0) { // note the numerical Jacobian will be returned in terms of X + // and will already have the corrections to convert into d/de + // instead of d/dT + jac_info_t jac_info; jac_info.h = dt; numerical_jac(burn_state, jac_info, Jac); + } else { actual_jac(burn_state, Jac); + // the above will have already multiplied the d/dT terms with 1/c_v + // The Jacobian from the nets is in terms of dYdot/dY, but we want // it was dXdot/dX, so convert here. for (int n = 1; n <= NumSpec; n++) { @@ -122,91 +124,34 @@ single_zone_jac(GpuArray const& state, Jac(m,n) = Jac(m,n) * aion_inv[n-1]; } } - } -#endif - - // at this point, our Jacobian should be entirely in terms of X, - // not Y. Let's now fix the rhs terms themselves to be in terms of - // dX/dt and not dY/dt. - for (int n = 0; n < NumSpec; ++n) { - ydot(n+1) *= aion[n]; - } - - // Our jacobian, dR/dw has the form: - // - // / 0 0 0 0 \ - // | d(rho X1dot)/drho d(rho X1dot)/dX1 d(rho X1dit)/dX2 ... d(rho X1dot)/dT | - // | d(rho X2dot)/drho d(rho X2dot)/dX1 d(rho X2dot)/dX2 ... d(rho X2dot)/dT | - // | ... | - // \ d(rho Edot)/drho d(rho Edot)/dX1 d(rho Edot)/dX2 ... d(rho Edot)/dT / - - for (int n = 0; n < NumSpec+2; ++n) { - for (int m = 0; m < NumSpec+2; ++m) { - dRdw(n,m) = 0.0_rt; - } - } - - // now perturb density and call the RHS to compute the derivative wrt rho - // species rates come back in terms of molar fractions - burn_t burn_state_pert = burn_state; - burn_state_pert.rho = burn_state.rho * (1.0_rt + eps); - burn_state_pert.T = burn_state.T; - burn_state_pert.e = burn_state.e; - for (int n = 0; n < NumSpec; ++n) { - burn_state_pert.xn[n] = burn_state.xn[n]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - burn_state_pert.aux[n] = burn_state.aux[n]; - } -#endif - - actual_rhs(burn_state_pert, ydot_pert); - - // make the rates dX/dt and not dY/dt - for (int n = 0; n < NumSpec; ++n) { - ydot_pert(n+1) *= aion[n]; - } - - // fill the column of dRdw corresponding to the derivative - // with respect to rho - for (int m = 1; m <= NumSpec; ++m) { - // d( d(rho X_m)/dt)/drho - dRdw(m, iwrho) = ydot(m) + - state[URHO] * (ydot_pert(m) - ydot(m))/(eps * burn_state.rho); - } + // now correct the species derivatives + // this constructs dy/dX_k |_e = dy/dX_k |_T - e_{X_k} |_T dy/dT / c_v - // d( d(rho E)/dt)/drho - dRdw(NumSpec+1, iwrho) = ydot(net_ienuc) + - state[URHO] * (ydot_pert(net_ienuc) - ydot(net_ienuc))/(eps * burn_state.rho); + // TODO: I think that this can be accomplished via burn_state directly! - // fill the columns of dRdw corresponding to each derivative - // with respect to species mass fraction - for (int n = 1; n <= NumSpec; ++n) { - dRdw(0, iwfs-1+n) = 0.0_rt; // density source - - for (int m = 1; m <= NumSpec; ++m) { - // d( d(rho X_m)/dt)/dX_n - dRdw(m, iwfs-1+n) = state[URHO] * Jac(m, n); - } + eos_re_extra_t eos_state; + eos_state.rho = burn_state.rho; + eos_state.T = burn_state.T; + eos_state.e = burn_state.e; + for (int n = 0; n < NumSpec; n++) { + eos_state.xn[n] = burn_state.xn[n]; + } - // d( d(rho E)/dt)/dX_n - dRdw(NumSpec+1, iwfs-1+n) = state[URHO] * Jac(net_ienuc, n); + eos(eos_input_re, eos_state); + eos_xderivs_t eos_xderivs = composition_derivatives(eos_state); + for (int m = 1; m <= neqs; m++) { + for (int n = 1; n <= NumSpec; n++) { + Jac(m, n) -= eos_xderivs.dedX[n-1] * Jac(m, net_ienuc); + } + } } +#endif - // now fill the column corresponding to derivatives with respect to - // energy -- this column is iwe - dRdw(0, iwe) = 0.0_rt; - - // d( d(rho X_m)/dt)/de - for (int m = 1; m <= NumSpec; ++m) { - dRdw(m, iwe) = state[URHO] * Jac(m, net_ienuc); - } + // at this point, our Jacobian should be entirely in terms of X, + // not Y. - // d( d(rho E)/dt)/de - dRdw(NumSpec+1, iwe) = state[URHO] * Jac(net_ienuc, net_ienuc); } #endif diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index 91790cc4fc..9e61f3a3df 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -15,9 +15,6 @@ #include #include -#ifndef NEW_NETWORK_IMPLEMENTATION -#include -#endif // The f_rhs routine provides the right-hand-side for the DVODE solver. // This is a generic interface that calls the specific RHS routine in the @@ -33,24 +30,20 @@ void rhs(const Real time, burn_t& burn_state, DvodeT& vode_state, RArray1D& dUdt GpuArray U_full; GpuArray R_full; - GpuArray R_react; - // evaluate R + // update the density -- we'll need this for the EOS call in the + // react update + + U_full[URHO] = burn_state.rho_orig + time * burn_state.ydot_a[SRHO]; + // we are not solving the momentum equations - // create a full state -- we need this for some interfaces - // note: vode_state % y(:) is 1-based + // and never need total energy, so we don't worry about filling those - U_full[URHO] = vode_state.y(1); for (int n = 0; n < NumSpec; n++) { - U_full[UFS+n] = vode_state.y(2+n); + U_full[UFS+n] = vode_state.y(1+n); } - U_full[UEINT] = vode_state.y(NumSpec+2); - U_full[UEDEN] = burn_state.E_var; - - U_full[UMX] = burn_state.mom[0]; - U_full[UMY] = burn_state.mom[1]; - U_full[UMZ] = burn_state.mom[2]; + U_full[UEINT] = vode_state.y(NumSpec+1); // initialize the temperature -- a better value will be found when we do the EOS // call in single_zone_react_source @@ -67,17 +60,13 @@ void rhs(const Real time, burn_t& burn_state, DvodeT& vode_state, RArray1D& dUdt burn_state.T = burn_state_pass.T; - R_react[0] = R_full[URHO]; - for (int n = 0; n < NumSpec; n++) { - R_react[1+n] = R_full[UFS+n]; - } - R_react[NumSpec+1] = R_full[UEINT]; - - // create the RHS -- this is 1-based + // now construct the RHS -- note this is 1-based - for (int n = 1; n <= NumSpec+2; n++) { - dUdt(n) = R_react[n-1] + burn_state.f_source[n-1]; + for (int n = 0; n < NumSpec; ++n) { + dUdt(n+1) = R_full[UFS+n] + burn_state.ydot_a[SFS+n]; } + dUdt(NumSpec+1) = R_full[UEINT] + burn_state.ydot_a[SEINT]; + } @@ -89,99 +78,33 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p // note the Jacobian is 1-based at the moment - Array2D dRdw = {0.0_rt}; - Array2D dwdU = {0.0_rt}; - GpuArray U_full; GpuArray R_full; - GpuArray R_react; // we are not solving the momentum equations // create a full state -- we need this for some interfaces - U_full[URHO] = vode_state.y(1); - for (int n = 0; n < NumSpec; n++) { - U_full[UFS+n] = vode_state.y(2+n); - } - U_full[UEINT] = vode_state.y(NumSpec+2); - U_full[UEDEN] = burn_state.E_var; - - U_full[UMX] = burn_state.mom[0]; - U_full[UMY] = burn_state.mom[1]; - U_full[UMZ] = burn_state.mom[2]; - - // compute the temperature and species derivatives -- - // maybe this should be done using the burn_state - // returned by single_zone_react_source, since it is - // more consistent T from e + U_full[URHO] = burn_state.rho_orig + time * burn_state.ydot_a[URHO]; - eos_extra_t eos_state; - eos_state.rho = U_full[URHO]; - eos_state.T = burn_state.T; // initial guess for (int n = 0; n < NumSpec; n++) { - eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; + U_full[UFS+n] = vode_state.y(1+n); } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; n++) { - eos_state.aux[n] = U_full[UFX+n] / U_full[URHO]; - } -#endif - eos_state.e = U_full[UEINT] / U_full[URHO]; //(U_full(UEDEN) - HALF*sum(U_full(UMX:UMZ))/U_full(URHO))/U_full(URHO) + U_full[UEINT] = vode_state.y(NumSpec+1); - - eos(eos_input_re, eos_state); - - U_full[UTEMP] = eos_state.T; + U_full[UTEMP] = burn_state.T; burn_t burn_state_pass; - single_zone_react_source(U_full, R_full, burn_state_pass); - - single_zone_jac(U_full, burn_state_pass, vode_state.H, dRdw); - - // construct dwdU - - // the density row - - dwdU(iwrho, 0) = 1.0_rt; - - // the X_k rows - - for (int m = 1; m < NumSpec+1; m++) { - dwdU(iwfs-1+m, 0) = -vode_state.y(m+1) / (vode_state.y(1) * vode_state.y(1)); - dwdU(iwfs-1+m, m) = 1.0_rt / vode_state.y(1); - } - - auto eos_xderivs = composition_derivatives(eos_state); - // now the e row -- this depends on whether we are evolving (rho E) or (rho e) + // this will initialize burn_state_pass and fill the temperature + // via the EOS - Real denom = 1.0_rt / eos_state.rho; - Real xn_sum = 0.0_rt; - for (int n = 0; n < NumSpec; ++n) { - xn_sum += eos_state.xn[n] * eos_xderivs.dedX[n]; - } - - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e); - - for (int m = 0; m < NumSpec; m++) { - dwdU(iwe, m+1) = -denom * eos_xderivs.dedX[m]; - } - - dwdU(iwe, iwe) = denom; - - // construct the Jacobian -- note: the Jacobian is 1-based + single_zone_react_source(U_full, R_full, burn_state_pass); - for (int n = 0; n <= NumSpec+1; ++n) { - for (int m = 0; m <= NumSpec+1; ++m) { - pd(n+1, m+1) = 0.0_rt; - for (int l = 0; l <= NumSpec+1; ++l) { - pd(n+1, m+1) = dRdw(n,l) * dwdU(l,m); - } - } - } + single_zone_jac(U_full, burn_state_pass, vode_state.H, pd); } + AMREX_GPU_HOST_DEVICE AMREX_INLINE void sdc_vode_solve(const Real dt_m, @@ -208,11 +131,11 @@ sdc_vode_solve(const Real dt_m, // The tolerance we are solving to may depend on the // iteration auto relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); - auto tol_dens = sdc_solver_tol_dens * relax_fac; auto tol_spec = sdc_solver_tol_spec * relax_fac; auto tol_ener = sdc_solver_tol_ener * relax_fac; // Update the momenta for this zone -- they don't react + for (int n = 0; n < 3; ++n) { U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; } @@ -223,36 +146,38 @@ sdc_vode_solve(const Real dt_m, // load rpar + + dvode_t dvode_state; + + burn_t burn_state; + + burn_state.rho_orig = U_old[URHO]; + burn_state.rhoe_orig = U_old[UEINT]; + // If we are solving the system as an ODE, then we are // solving // dU/dt = R(U) + C // so we simply pass in C - GpuArray C_react; - C_react[0] = C[URHO]; - for (int n = 0; n < NumSpec; ++n) { - C_react[1+n] = C[UFS+n]; - } - C_react[NumSpec+1] = C[UEINT]; - - dvode_t dvode_state; - - burn_t burn_state; - - burn_state.success = true; + // Note: we need to use the indexing order that is defined in the burn_t + // so we are compatible with the integrator logic - for (int n = 0; n < NumSpec+2; n++) { - burn_state.f_source[n] = C_react[n]; + burn_state.ydot_a[SRHO] = C[URHO]; + burn_state.ydot_a[SMX] = C[UMX]; + burn_state.ydot_a[SMY] = C[UMY]; + burn_state.ydot_a[SMZ] = C[UMZ]; + burn_state.ydot_a[SEDEN] = C[UEDEN]; + burn_state.ydot_a[SEINT] = C[UEINT]; + for (int n = 0; n < NumSpec; n++) { + burn_state.ydot_a[SFS+n] = C[UFS+n]; } - - for (int n = 0; n < 3; n++) { - burn_state.mom[n] = U_new[UMX+n]; +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.ydot_a[SFX+n] = C[UFX+n]; } +#endif - // we are always solving for rhoe with the VODE predict - burn_state.e = U_new[UEDEN]; - - burn_state.E_var = U_old[UEDEN]; + burn_state.success = true; // temperature will be used as an initial guess in the EOS burn_state.T = U_old[UTEMP]; @@ -267,11 +192,10 @@ sdc_vode_solve(const Real dt_m, // access it as 0-based in our implementation of the // RHS routine - dvode_state.y(1) = U_old[URHO]; for (int n = 0; n < NumSpec; ++n) { - dvode_state.y(2+n) = U_old[UFS+n]; + dvode_state.y(1+n) = U_old[UFS+n]; } - dvode_state.y(NumSpec+2) = U_old[UEINT]; + dvode_state.y(NumSpec+1) = U_old[UEINT]; // // set the maximum number of steps allowed // dvode_state.MXSTEP = 25000; @@ -280,13 +204,13 @@ sdc_vode_solve(const Real dt_m, dvode_state.tout = dt_m; dvode_state.HMXI = 1.0_rt / ode_max_dt; + dvode_state.jacobian_type = integrator_rp::jacobian; + // relative tolerances - dvode_state.rtol_dens = tol_dens; dvode_state.rtol_spec = tol_spec; dvode_state.rtol_enuc = tol_ener; // absolute tolerances - dvode_state.atol_dens = sdc_solver_atol * U_old[URHO]; dvode_state.atol_spec = sdc_solver_atol * U_old[URHO]; // this way, atol is the minimum x dvode_state.atol_enuc = sdc_solver_atol * U_old[UEINT]; @@ -297,11 +221,11 @@ sdc_vode_solve(const Real dt_m, } // update the full U_new - U_new[URHO] = dvode_state.y(1); for (int n = 0; n < NumSpec; ++n) { - U_new[UFS+n] = dvode_state.y(2+n); + U_new[UFS+n] = dvode_state.y(1+n); } - U_new[UEINT] = dvode_state.y(NumSpec+2); + U_new[UEINT] = dvode_state.y(NumSpec+1); + auto v2 = 0.0_rt; for (int n = 0; n < 3; ++n) { v2 += U_new[UMX+n] * U_new[UMX+n];