From 32f09b5d9ad3506c5a3cf4984e68822aee2299ff Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 18 Sep 2023 13:03:35 -0400 Subject: [PATCH 01/13] move SDC-specific routiens into Source/sdc --- Source/reactions/Castro_react_util.H | 208 -------------------------- Source/sdc/Castro_sdc_util.H | 1 + Source/sdc/Make.package | 1 + Source/sdc/sdc_react_util.H | 212 +++++++++++++++++++++++++++ Source/sdc/vode_rhs_true_sdc.H | 1 + 5 files changed, 215 insertions(+), 208 deletions(-) create mode 100644 Source/sdc/sdc_react_util.H diff --git a/Source/reactions/Castro_react_util.H b/Source/reactions/Castro_react_util.H index bca29ac27c..f64bfc6836 100644 --- a/Source/reactions/Castro_react_util.H +++ b/Source/reactions/Castro_react_util.H @@ -53,212 +53,4 @@ okay_to_burn_type(burn_t const& state) { return true; } -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -single_zone_react_source(GpuArray const& state, - GpuArray& R, - burn_t& burn_state) { - - // note: we want this burn_state to come out of here so we can - // reuse it elsewhere - - auto rhoInv = 1.0_rt / state[URHO]; - - burn_state.rho = state[URHO]; - burn_state.T = state[UTEMP]; - burn_state.e = state[UEINT] * rhoInv; - - for (int n = 0; n < NumSpec; ++n) { - burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x); - } - -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - burn_state.aux[n] = state[UFX+n] * rhoInv; - } -#endif - - eos_t eos_state; - - // Ensure that the temperature going in is consistent with the internal energy. - burn_to_eos(burn_state, eos_state); - eos(eos_input_re, eos_state); - eos_to_burn(eos_state, burn_state); - - // eos_get_small_temp(&small_temp); - burn_state.T = amrex::min(MAX_TEMP, amrex::max(burn_state.T, small_temp)); - - Array1D ydot; - - actual_rhs(burn_state, ydot); - - // store the instantaneous R - for (int n = 0; n < NUM_STATE; ++n) { - R[n] = 0.0_rt; - } - - // species rates come back in terms of molar fractions - for (int n = 0; n < NumSpec; ++n) { - R[UFS+n] = state[URHO] * aion[n] * ydot(1+n); - } - - R[UEDEN] = state[URHO] * ydot(net_ienuc); - R[UEINT] = state[URHO] * ydot(net_ienuc); -} - -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -single_zone_react_source(const int i, const int j, const int k, - Array4 const& state, - Array4 const& R, - burn_t& burn_state) { - GpuArray state_arr; - GpuArray R_arr; - - for (int n = 0; n < NUM_STATE; ++n) { - state_arr[n] = state(i,j,k,n); - R_arr[n] = R(i,j,k,n); - } - - single_zone_react_source(state_arr, R_arr, burn_state); - - for (int n = 0; n < NUM_STATE; ++n) { - R(i,j,k,n) = R_arr[n]; - } -} - -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -single_zone_jac(GpuArray const& state, - burn_t& burn_state, const Real dt, - Array2D& dRdw) { - -#ifdef SIMPLIFIED_SDC -#ifndef AMREX_USE_GPU - amrex::Error("we shouldn't be here with the simplified SDC method (USE_SIMPLIFIED_SDC=TRUE)"); -#endif -#else - - Array1D ydot; - Array1D ydot_pert; - JacNetArray2D Jac; - - const auto eps = 1.e-8_rt; - - actual_rhs(burn_state, ydot); - - // Jac has the derivatives with respect to the native - // network variables, X, T. e. It does not have derivatives with - // respect to density, so we'll have to compute those ourselves. - - if (sdc_newton_use_analytic_jac == 0) { - // note the numerical Jacobian will be returned in terms of X - jac_info_t jac_info; - jac_info.h = dt; - numerical_jac(burn_state, jac_info, Jac); - } else { - actual_jac(burn_state, Jac); - - // The Jacobian from the nets is in terms of dYdot/dY, but we want - // it was dXdot/dX, so convert here. - for (int n = 1; n <= NumSpec; n++) { - for (int m = 1; m <= neqs; m++) { - // d(X_n)/dq_m - Jac(n,m) = Jac(n,m) * aion[n-1]; - } - } - - for (int m = 1; m <= neqs; m++) { - for (int n = 1; n <= NumSpec; n++) { - // d RHS_m / dX_n - Jac(m,n) = Jac(m,n) * aion_inv[n-1]; - } - } - } -#endif - - // at this point, our Jacobian should be entirely in terms of X, - // not Y. Let's now fix the rhs terms themselves to be in terms of - // dX/dt and not dY/dt. - for (int n = 0; n < NumSpec; ++n) { - ydot(n+1) *= aion[n]; - } - - // Our jacobian, dR/dw has the form: - // - // / 0 0 0 0 \ - // | d(rho X1dot)/drho d(rho X1dot)/dX1 d(rho X1dit)/dX2 ... d(rho X1dot)/dT | - // | d(rho X2dot)/drho d(rho X2dot)/dX1 d(rho X2dot)/dX2 ... d(rho X2dot)/dT | - // | ... | - // \ d(rho Edot)/drho d(rho Edot)/dX1 d(rho Edot)/dX2 ... d(rho Edot)/dT / - - for (int n = 0; n < NumSpec+2; ++n) { - for (int m = 0; m < NumSpec+2; ++m) { - dRdw(n,m) = 0.0_rt; - } - } - - // now perturb density and call the RHS to compute the derivative wrt rho - // species rates come back in terms of molar fractions - burn_t burn_state_pert = burn_state; - - burn_state_pert.rho = burn_state.rho * (1.0_rt + eps); - burn_state_pert.T = burn_state.T; - burn_state_pert.e = burn_state.e; - for (int n = 0; n < NumSpec; ++n) { - burn_state_pert.xn[n] = burn_state.xn[n]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - burn_state_pert.aux[n] = burn_state.aux[n]; - } -#endif - - actual_rhs(burn_state_pert, ydot_pert); - - // make the rates dX/dt and not dY/dt - for (int n = 0; n < NumSpec; ++n) { - ydot_pert(n+1) *= aion[n]; - } - - // fill the column of dRdw corresponding to the derivative - // with respect to rho - for (int m = 1; m <= NumSpec; ++m) { - // d( d(rho X_m)/dt)/drho - dRdw(m, iwrho) = ydot(m) + - state[URHO] * (ydot_pert(m) - ydot(m))/(eps * burn_state.rho); - } - - // d( d(rho E)/dt)/drho - dRdw(NumSpec+1, iwrho) = ydot(net_ienuc) + - state[URHO] * (ydot_pert(net_ienuc) - ydot(net_ienuc))/(eps * burn_state.rho); - - // fill the columns of dRdw corresponding to each derivative - // with respect to species mass fraction - for (int n = 1; n <= NumSpec; ++n) { - dRdw(0, iwfs-1+n) = 0.0_rt; // density source - - for (int m = 1; m <= NumSpec; ++m) { - // d( d(rho X_m)/dt)/dX_n - dRdw(m, iwfs-1+n) = state[URHO] * Jac(m, n); - } - - // d( d(rho E)/dt)/dX_n - dRdw(NumSpec+1, iwfs-1+n) = state[URHO] * Jac(net_ienuc, n); - - } - - // now fill the column corresponding to derivatives with respect to - // energy -- this column is iwe - dRdw(0, iwe) = 0.0_rt; - - // d( d(rho X_m)/dt)/de - for (int m = 1; m <= NumSpec; ++m) { - dRdw(m, iwe) = state[URHO] * Jac(m, net_ienuc); - } - - // d( d(rho E)/dt)/de - dRdw(NumSpec+1, iwe) = state[URHO] * Jac(net_ienuc, net_ienuc); -} - #endif diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index ef632056b5..d223d759a7 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -806,6 +806,7 @@ instantaneous_react(const int i, const int j, const int k, } } } + #endif #endif diff --git a/Source/sdc/Make.package b/Source/sdc/Make.package index 78cbb732ac..709b42db9d 100644 --- a/Source/sdc/Make.package +++ b/Source/sdc/Make.package @@ -7,5 +7,6 @@ ifneq ($(USE_GPU), TRUE) CEXE_headers += Castro_sdc_util.H ifeq ($(USE_REACT), TRUE) CEXE_headers += vode_rhs_true_sdc.H + CEXE_headers += sdc_react_util.H endif endif diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H new file mode 100644 index 0000000000..3357612879 --- /dev/null +++ b/Source/sdc/sdc_react_util.H @@ -0,0 +1,212 @@ +#ifndef SDC_REACT_UTIL_H +#define SDC_REACT_UTIL_H + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +single_zone_react_source(GpuArray const& state, + GpuArray& R, + burn_t& burn_state) { + + // note: we want this burn_state to come out of here so we can + // reuse it elsewhere + + auto rhoInv = 1.0_rt / state[URHO]; + + burn_state.rho = state[URHO]; + burn_state.T = state[UTEMP]; + burn_state.e = state[UEINT] * rhoInv; + + for (int n = 0; n < NumSpec; ++n) { + burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x); + } + +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + burn_state.aux[n] = state[UFX+n] * rhoInv; + } +#endif + + eos_t eos_state; + + // Ensure that the temperature going in is consistent with the internal energy. + burn_to_eos(burn_state, eos_state); + eos(eos_input_re, eos_state); + eos_to_burn(eos_state, burn_state); + + // eos_get_small_temp(&small_temp); + burn_state.T = amrex::min(MAX_TEMP, amrex::max(burn_state.T, small_temp)); + + Array1D ydot; + + actual_rhs(burn_state, ydot); + + // store the instantaneous R + for (int n = 0; n < NUM_STATE; ++n) { + R[n] = 0.0_rt; + } + + // species rates come back in terms of molar fractions + for (int n = 0; n < NumSpec; ++n) { + R[UFS+n] = state[URHO] * aion[n] * ydot(1+n); + } + + R[UEDEN] = state[URHO] * ydot(net_ienuc); + R[UEINT] = state[URHO] * ydot(net_ienuc); +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +single_zone_react_source(const int i, const int j, const int k, + Array4 const& state, + Array4 const& R, + burn_t& burn_state) { + GpuArray state_arr; + GpuArray R_arr; + + for (int n = 0; n < NUM_STATE; ++n) { + state_arr[n] = state(i,j,k,n); + R_arr[n] = R(i,j,k,n); + } + + single_zone_react_source(state_arr, R_arr, burn_state); + + for (int n = 0; n < NUM_STATE; ++n) { + R(i,j,k,n) = R_arr[n]; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +single_zone_jac(GpuArray const& state, + burn_t& burn_state, const Real dt, + Array2D& dRdw) { + +#ifdef SIMPLIFIED_SDC +#ifndef AMREX_USE_GPU + amrex::Error("we shouldn't be here with the simplified SDC method (USE_SIMPLIFIED_SDC=TRUE)"); +#endif +#else + + Array1D ydot; + Array1D ydot_pert; + JacNetArray2D Jac; + + const auto eps = 1.e-8_rt; + + actual_rhs(burn_state, ydot); + + // Jac has the derivatives with respect to the native + // network variables, X, T. e. It does not have derivatives with + // respect to density, so we'll have to compute those ourselves. + + if (sdc_newton_use_analytic_jac == 0) { + // note the numerical Jacobian will be returned in terms of X + jac_info_t jac_info; + jac_info.h = dt; + numerical_jac(burn_state, jac_info, Jac); + } else { + actual_jac(burn_state, Jac); + + // The Jacobian from the nets is in terms of dYdot/dY, but we want + // it was dXdot/dX, so convert here. + for (int n = 1; n <= NumSpec; n++) { + for (int m = 1; m <= neqs; m++) { + // d(X_n)/dq_m + Jac(n,m) = Jac(n,m) * aion[n-1]; + } + } + + for (int m = 1; m <= neqs; m++) { + for (int n = 1; n <= NumSpec; n++) { + // d RHS_m / dX_n + Jac(m,n) = Jac(m,n) * aion_inv[n-1]; + } + } + } +#endif + + // at this point, our Jacobian should be entirely in terms of X, + // not Y. Let's now fix the rhs terms themselves to be in terms of + // dX/dt and not dY/dt. + for (int n = 0; n < NumSpec; ++n) { + ydot(n+1) *= aion[n]; + } + + // Our jacobian, dR/dw has the form: + // + // / 0 0 0 0 \ + // | d(rho X1dot)/drho d(rho X1dot)/dX1 d(rho X1dit)/dX2 ... d(rho X1dot)/dT | + // | d(rho X2dot)/drho d(rho X2dot)/dX1 d(rho X2dot)/dX2 ... d(rho X2dot)/dT | + // | ... | + // \ d(rho Edot)/drho d(rho Edot)/dX1 d(rho Edot)/dX2 ... d(rho Edot)/dT / + + for (int n = 0; n < NumSpec+2; ++n) { + for (int m = 0; m < NumSpec+2; ++m) { + dRdw(n,m) = 0.0_rt; + } + } + + // now perturb density and call the RHS to compute the derivative wrt rho + // species rates come back in terms of molar fractions + burn_t burn_state_pert = burn_state; + + burn_state_pert.rho = burn_state.rho * (1.0_rt + eps); + burn_state_pert.T = burn_state.T; + burn_state_pert.e = burn_state.e; + for (int n = 0; n < NumSpec; ++n) { + burn_state_pert.xn[n] = burn_state.xn[n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + burn_state_pert.aux[n] = burn_state.aux[n]; + } +#endif + + actual_rhs(burn_state_pert, ydot_pert); + + // make the rates dX/dt and not dY/dt + for (int n = 0; n < NumSpec; ++n) { + ydot_pert(n+1) *= aion[n]; + } + + // fill the column of dRdw corresponding to the derivative + // with respect to rho + for (int m = 1; m <= NumSpec; ++m) { + // d( d(rho X_m)/dt)/drho + dRdw(m, iwrho) = ydot(m) + + state[URHO] * (ydot_pert(m) - ydot(m))/(eps * burn_state.rho); + } + + // d( d(rho E)/dt)/drho + dRdw(NumSpec+1, iwrho) = ydot(net_ienuc) + + state[URHO] * (ydot_pert(net_ienuc) - ydot(net_ienuc))/(eps * burn_state.rho); + + // fill the columns of dRdw corresponding to each derivative + // with respect to species mass fraction + for (int n = 1; n <= NumSpec; ++n) { + dRdw(0, iwfs-1+n) = 0.0_rt; // density source + + for (int m = 1; m <= NumSpec; ++m) { + // d( d(rho X_m)/dt)/dX_n + dRdw(m, iwfs-1+n) = state[URHO] * Jac(m, n); + } + + // d( d(rho E)/dt)/dX_n + dRdw(NumSpec+1, iwfs-1+n) = state[URHO] * Jac(net_ienuc, n); + + } + + // now fill the column corresponding to derivatives with respect to + // energy -- this column is iwe + dRdw(0, iwe) = 0.0_rt; + + // d( d(rho X_m)/dt)/de + for (int m = 1; m <= NumSpec; ++m) { + dRdw(m, iwe) = state[URHO] * Jac(m, net_ienuc); + } + + // d( d(rho E)/dt)/de + dRdw(NumSpec+1, iwe) = state[URHO] * Jac(net_ienuc, net_ienuc); +} + +#endif diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index a01b11b9be..bd12ae4e2c 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -12,6 +12,7 @@ #include #endif #include +#include // The f_rhs routine provides the right-hand-side for the DVODE solver. // This is a generic interface that calls the specific RHS routine in the From 30862ecd632bbff068653f2724ef8e8c1bfa56ff Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 18 Sep 2023 13:44:39 -0400 Subject: [PATCH 02/13] move the Newton solver into its own header --- Source/sdc/Castro_sdc_util.H | 443 +-------------------------------- Source/sdc/Make.package | 1 + Source/sdc/sdc_newton_solve.H | 453 ++++++++++++++++++++++++++++++++++ 3 files changed, 455 insertions(+), 442 deletions(-) create mode 100644 Source/sdc/sdc_newton_solve.H diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index d223d759a7..09581b06e8 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -7,17 +7,13 @@ #include #endif #include +#include #include #ifndef NEW_NETWORK_IMPLEMENTATION #include #endif #endif -// error codes -constexpr int NEWTON_SUCCESS = 0; -constexpr int SINGULAR_MATRIX = -1; -constexpr int CONVERGENCE_FAILURE = -2; - // solvers constexpr int NEWTON_SOLVE = 1; constexpr int VODE_SOLVE = 2; @@ -50,443 +46,6 @@ normalize_species_sdc(const int i, const int j, const int k, #ifdef REACTIONS -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -f_sdc_jac(const Real dt_m, - GpuArray const& U, - GpuArray& f, - RArray2D& Jac, - GpuArray& f_source, - GpuArray& mom_source, - const Real T_old, - const Real E_var) { - - // This is used with the Newton solve and returns f and the Jacobian - - GpuArray U_full; - GpuArray R_full; - GpuArray R_react; - - Array2D dRdw = {0.0_rt}; - Array2D dwdU = {0.0_rt}; - - Array2D dRdU; - - // we are not solving the momentum equations - // create a full state -- we need this for some interfaces - U_full[URHO] = U[0]; - - for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = U[1+n]; - } - - if (sdc_solve_for_rhoe == 1) { - U_full[UEINT] = U[NumSpec+1]; - U_full[UEDEN] = E_var; - } else { - U_full[UEDEN] = U[NumSpec+1]; - U_full[UEINT] = E_var; - } - - for (int n = 0; n < 3; ++n) { - U_full[UMX+n] = mom_source[n]; - } - - // normalize the species - auto sum_rhoX = 0.0_rt; - for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]); - sum_rhoX += U_full[UFS+n]; - } - - for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] *= U_full[URHO] / sum_rhoX; - } - - // compute the temperature and species derivatives -- - // maybe this should be done using the burn_state - // returned by single_zone_react_source, since it is - // more consistent T from e - - eos_extra_t eos_state; - eos_state.rho = U_full[URHO]; - eos_state.T = T_old; // initial guess - for (int n = 0; n < NumSpec; ++n) { - eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - eos_state.aux[n] = U_full[UFX+n] / U_full[URHO]; - } -#endif - eos_state.e = U_full[UEINT] / U_full[URHO]; // (U_full[UEDEN] - 0.5_rt*sum(U_full(UMX:UMZ))/U_full[URHO])/U_full[URHO] - - eos(eos_input_re, eos_state); - - U_full[UTEMP] = eos_state.T; - - // we'll create a burn_state to pass stuff from the RHS to the Jac function - burn_t burn_state; - - single_zone_react_source(U_full, R_full, burn_state); - - // store the subset of R used in the Jacobian - R_react[0] = R_full[URHO]; - for (int n = 0; n < NumSpec; ++n) { - R_react[n+1] = R_full[UFS+n]; - } - if (sdc_solve_for_rhoe == 1) { - R_react[NumSpec+1] = R_full[UEINT]; - } else { - R_react[NumSpec+1] = R_full[UEDEN]; - } - - for (int n = 0; n < NumSpec+2; ++n) { - f[n] = U[n] - dt_m * R_react[n] - f_source[n]; - } - - // get dRdw -- this may do a numerical approximation or use the - // network's analytic Jac - single_zone_jac(U_full, burn_state, dt_m, dRdw); - - // the density row - dwdU(iwrho, 0) = 1.0_rt; - - // the X_k rows - for (int m = 1; m < NumSpec+1; ++m) { - dwdU(iwfs-1+m, 0) = -U[m] / (U[0]*U[0]); - dwdU(iwfs-1+m, m) = 1.0_rt / U[0]; - } - - auto eos_xderivs = composition_derivatives(eos_state); - - // now the e row -- this depends on whether we are evolving (rho E) or (rho e) - auto denom = 1.0_rt / eos_state.rho; - auto xn_sum = 0.0_rt; - for (int n = 0; n < NumSpec; ++n) { - xn_sum += eos_state.xn[n] * eos_xderivs.dedX[n]; - } - - if (sdc_solve_for_rhoe == 1) { - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e); - } else { - auto u2_sum = 0.0_rt; - for (auto n = 0; n < 3; ++n) { - u2_sum += U_full[UMX+n] * U_full[UMX+n]; - } - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e - - 0.5_rt * u2_sum / (eos_state.rho * eos_state.rho)); - } - - for (int m = 0; m < NumSpec; ++m) { - dwdU(iwe,m+1) = -denom * eos_xderivs.dedX[m]; - } - - dwdU(iwe, NumSpec+1) = denom; - - // construct the Jacobian -- we can get most of the - // terms from the network itself, but we do not rely on - // it having derivative wrt density - - // Note: Jac is 1-based, because that is what the linear algebra - // routines expect but the components, dRdw and dwdU are 0-based! - - for (int n = 1; n <= NumSpec+2; ++n) { - for (int m = 1; m <= NumSpec+2; ++m) { - Jac(n, m) = 0.0_rt; - } - } - - for (int m = 1; m <= NumSpec+2; ++m) { - Jac(m, m) = 1.0_rt; - } - - // auto dRdU = matmul(dRdw, dwdU); - // dRdU is 0-based - - for (int n = 0; n <= NumSpec+1; ++n) { - for (int m = 0; m <= NumSpec+1; ++m) { - dRdU(n,m) = 0.0_rt; - for (int l = 0; l <= NumSpec+1; ++l) { - dRdU(n,m) += dRdw(n,l) * dwdU(l,m); - } - } - } - - for (int n = 0; n < NumSpec+2; ++n) { - for (int m = 0; m < NumSpec+2; ++m) { - Jac(n+1, m+1) -= dt_m * dRdU(n,m); - } - } - -} - - -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -sdc_newton_solve(const Real dt_m, - GpuArray const& U_old, - GpuArray & U_new, - GpuArray const& C, - const int sdc_iteration, - Real& err_out, - int& ierr) { - // the purpose of this function is to solve the system - // U - dt R(U) = U_old + dt C using a Newton solve. - // - // here, U_new should come in as a guess for the new U - // and will be returned with the value that satisfied the - // nonlinear function - - RArray2D Jac; - - // we will do the implicit update of only the terms that - // have reactive sources - // - // 0 : rho - // 1:NumSpec : species - // NumSpec+1 : (rho E) or (rho e) - - GpuArray U_react; - GpuArray f_source; - GpuArray mom_source; - GpuArray dU_react; - GpuArray f; - RArray1D f_rhs; - - const int MAX_ITER = 100; - - ierr = NEWTON_SUCCESS; - - // the tolerance we are solving to may depend on the - // iteration - Real relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); - Real tol_dens = sdc_solver_tol_dens * relax_fac; - Real tol_spec = sdc_solver_tol_spec * relax_fac; - Real tol_ener = sdc_solver_tol_ener * relax_fac; - - // update the momenta for this zone -- they don't react - for (int n = 0; n < 3; ++n) { - U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; - } - - // now only save the subset that participates in the - // nonlinear solve -- note: we include the old state in - // f_source - - // load rpar - - // for the Jacobian solve, we are solving - // f(U) = U - dt R(U) - U_old - dt C = 0 - // we define f_source = U_old + dt C so we are solving - // f(U) = U - dt R(U) - f_source = 0 - - f_source[0] = U_old[URHO] + dt_m * C[URHO]; - for (int n = 0; n < NumSpec; ++n) { - f_source[1 + n] = U_old[UFS + n] + dt_m * C[UFS + n]; - } - if (sdc_solve_for_rhoe == 1) { - f_source[NumSpec+1] = U_old[UEINT] + dt_m * C[UEINT]; - } else { - f_source[NumSpec+1] = U_old[UEDEN] + dt_m * C[UEDEN]; - } - - // set the momenta to be U_new - for (int n = 0; n < 3; ++n) { - mom_source[n] = U_new[UMX+n]; - } - - // temperature will be used as an initial guess in the EOS - - Real T_old = U_old[UTEMP]; - - // we should be able to do an update for this somehow? - - Real E_var; - - if (sdc_solve_for_rhoe == 1) { - E_var = U_new[UEDEN]; - } else { - E_var = U_new[UEINT]; - } - - // store the subset for the nonlinear solve - // We use an initial guess if possible - U_react[0] = U_new[URHO]; - for (int n = 0; n < NumSpec; ++n) { - U_react[1+n] = U_new[UFS+n]; - } - if (sdc_solve_for_rhoe == 1) { - U_react[NumSpec+1] = U_new[UEINT]; - } else { - U_react[NumSpec+1] = U_new[UEDEN]; - } - -#if (INTEGRATOR == 0) - - // do a simple Newton solve - - // iterative loop - int iter = 0; - int max_newton_iter = MAX_ITER; - - Real err = 1.e30_rt; - bool converged = false; - - while (!converged && iter < max_newton_iter) { - int info = 0; - f_sdc_jac(dt_m, U_react, f, Jac, f_source, mom_source, T_old, E_var); - - IArray1D ipvt; - - // solve the linear system: Jac dU_react = -f -#ifdef NEW_NETWORK_IMPLEMENTATION - RHS::dgefa(Jac); - info = 0; -#else - dgefa(Jac, ipvt, info); -#endif - if (info != 0) { - ierr = SINGULAR_MATRIX; - return; - } - - for (int n = 1; n <= NumSpec+2; ++n) { - f_rhs(n) = -f[n-1]; - } - -#ifdef NEW_NETWORK_IMPLEMENTATION - RHS::dgesl(Jac, f_rhs); -#else - dgesl(Jac, ipvt, f_rhs); -#endif - - for (int n = 0; n < NumSpec+2; ++n) { - dU_react[n] = f_rhs(n+1); - } - - // how much of dU_react should we apply? - Real eta = 1.0_rt; - for (int n = 0; n < NumSpec+2; ++n) { - dU_react[n] *= eta; - U_react[n] += dU_react[n]; - } - - GpuArray eps_tot; - - eps_tot[0] = tol_dens * std::abs(U_react[0]) + sdc_solver_atol; - - // for species, atol is the mass fraction limit, so we - // multiply by density to get a partial density limit - for (int n = 0; n < NumSpec; ++n) { - eps_tot[1 + n] = tol_spec * std::abs(U_react[1 + n]) + sdc_solver_atol * std::abs(U_react[0]); - } - eps_tot[NumSpec+1] = tol_ener * std::abs(U_react[NumSpec+1]) + sdc_solver_atol; - - // compute the norm of the weighted error, where the - // weights are 1/eps_tot - auto err_sum = 0.0_rt; - for (int n = 0; n < NumSpec+2; ++n) { - err_sum += dU_react[n] * dU_react[n] / (eps_tot[n]* eps_tot[n]); - } - err = std::sqrt(err_sum / (NumSpec+2)); - - if (err < 1.0_rt) { - converged = true; - } - iter++; - } - - err_out = err; - - if (!converged) { - ierr = CONVERGENCE_FAILURE; - return; - } - -#endif - - // update the full U_new - // if we updated total energy, then correct internal, - // or vice versa - U_new[URHO] = U_react[0]; - for (int n = 0; n < NumSpec; ++n) { - U_new[UFS+n] = U_react[1+n]; - } - auto v2 = 0.0_rt; - for (int m = 0; m < 3; ++m) { - v2 += U_new[UMX+m] * U_new[UMX+m]; - } - - if (sdc_solve_for_rhoe == 1) { - U_new[UEINT] = U_react[NumSpec+1]; - U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; - } else { - U_new[UEDEN] = U_react[NumSpec+1]; - U_new[UEINT] = U_new[UEDEN] - 0.5_rt * v2 / U_new[URHO]; - } -} - -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -sdc_newton_subdivide(const Real dt_m, - GpuArray const& U_old, - GpuArray& U_new, - GpuArray const& C, - const int sdc_iteration, - Real& err_out, - int& ierr) { - // This is the driver for solving the nonlinear update for - // the reating/advecting system using Newton's method. It - // attempts to do the solution for the full dt_m requested, - // but if it fails, will subdivide the domain until it - // converges or reaches our limit on the number of - // subintervals. - - const int MAX_NSUB = 64; - GpuArray U_begin; - - // subdivide the timestep and do multiple Newtons. We come - // in here with an initial guess for the new solution - // stored in U_new. That only really makes sense for the - // case where we have 1 substep. Otherwise, we should just - // use the old time solution. - - int nsub = 1; - ierr = CONVERGENCE_FAILURE; - - for (int n = 0; n < NUM_STATE; ++n) { - U_begin[n] = U_old[n]; - } - - while (nsub < MAX_NSUB && ierr != NEWTON_SUCCESS) { - if (nsub > 1) { - for (int n = 0; n < NUM_STATE; ++n) { - U_new[n] = U_old[n]; - } - } - Real dt_sub = dt_m / nsub; - - for (int isub = 0; isub < nsub; ++isub) { - // normalize species - Real sum_rhoX = 0.0_rt; - for (int n = 0; n < NumSpec; ++n) { - U_begin[UFS + n] = amrex::max(network_rp::small_x, U_begin[UFS + n]); - sum_rhoX += U_begin[UFS + n]; - } - for (int n = 0; n < NumSpec; ++n) { - U_begin[UFS + n] *= U_begin[URHO] / sum_rhoX; - } - - sdc_newton_solve(dt_sub, U_begin, U_new, C, sdc_iteration, err_out, ierr); - - for (int n = 0; n < NUM_STATE; ++n) { - U_begin[n] = U_new[n]; - } - } - nsub *= 2; - } -} AMREX_GPU_HOST_DEVICE AMREX_INLINE void diff --git a/Source/sdc/Make.package b/Source/sdc/Make.package index 709b42db9d..c135c88138 100644 --- a/Source/sdc/Make.package +++ b/Source/sdc/Make.package @@ -8,5 +8,6 @@ ifneq ($(USE_GPU), TRUE) ifeq ($(USE_REACT), TRUE) CEXE_headers += vode_rhs_true_sdc.H CEXE_headers += sdc_react_util.H + CEXE_headers += sdc_newton_solve.H endif endif diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H new file mode 100644 index 0000000000..df0f0faebe --- /dev/null +++ b/Source/sdc/sdc_newton_solve.H @@ -0,0 +1,453 @@ +#ifndef SDC_NEWTON_SOLVE_H +#define SDC_NEWTON_SOLVE_H + +#include + +// error codes +constexpr int NEWTON_SUCCESS = 0; +constexpr int SINGULAR_MATRIX = -1; +constexpr int CONVERGENCE_FAILURE = -2; + + +#ifdef REACTIONS + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +f_sdc_jac(const Real dt_m, + GpuArray const& U, + GpuArray& f, + RArray2D& Jac, + GpuArray& f_source, + GpuArray& mom_source, + const Real T_old, + const Real E_var) { + + // This is used with the Newton solve and returns f and the Jacobian + + GpuArray U_full; + GpuArray R_full; + GpuArray R_react; + + Array2D dRdw = {0.0_rt}; + Array2D dwdU = {0.0_rt}; + + Array2D dRdU; + + // we are not solving the momentum equations + // create a full state -- we need this for some interfaces + U_full[URHO] = U[0]; + + for (int n = 0; n < NumSpec; ++n) { + U_full[UFS+n] = U[1+n]; + } + + if (sdc_solve_for_rhoe == 1) { + U_full[UEINT] = U[NumSpec+1]; + U_full[UEDEN] = E_var; + } else { + U_full[UEDEN] = U[NumSpec+1]; + U_full[UEINT] = E_var; + } + + for (int n = 0; n < 3; ++n) { + U_full[UMX+n] = mom_source[n]; + } + + // normalize the species + auto sum_rhoX = 0.0_rt; + for (int n = 0; n < NumSpec; ++n) { + U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]); + sum_rhoX += U_full[UFS+n]; + } + + for (int n = 0; n < NumSpec; ++n) { + U_full[UFS+n] *= U_full[URHO] / sum_rhoX; + } + + // compute the temperature and species derivatives -- + // maybe this should be done using the burn_state + // returned by single_zone_react_source, since it is + // more consistent T from e + + eos_extra_t eos_state; + eos_state.rho = U_full[URHO]; + eos_state.T = T_old; // initial guess + for (int n = 0; n < NumSpec; ++n) { + eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + eos_state.aux[n] = U_full[UFX+n] / U_full[URHO]; + } +#endif + eos_state.e = U_full[UEINT] / U_full[URHO]; // (U_full[UEDEN] - 0.5_rt*sum(U_full(UMX:UMZ))/U_full[URHO])/U_full[URHO] + + eos(eos_input_re, eos_state); + + U_full[UTEMP] = eos_state.T; + + // we'll create a burn_state to pass stuff from the RHS to the Jac function + burn_t burn_state; + + single_zone_react_source(U_full, R_full, burn_state); + + // store the subset of R used in the Jacobian + R_react[0] = R_full[URHO]; + for (int n = 0; n < NumSpec; ++n) { + R_react[n+1] = R_full[UFS+n]; + } + if (sdc_solve_for_rhoe == 1) { + R_react[NumSpec+1] = R_full[UEINT]; + } else { + R_react[NumSpec+1] = R_full[UEDEN]; + } + + for (int n = 0; n < NumSpec+2; ++n) { + f[n] = U[n] - dt_m * R_react[n] - f_source[n]; + } + + // get dRdw -- this may do a numerical approximation or use the + // network's analytic Jac + single_zone_jac(U_full, burn_state, dt_m, dRdw); + + // the density row + dwdU(iwrho, 0) = 1.0_rt; + + // the X_k rows + for (int m = 1; m < NumSpec+1; ++m) { + dwdU(iwfs-1+m, 0) = -U[m] / (U[0]*U[0]); + dwdU(iwfs-1+m, m) = 1.0_rt / U[0]; + } + + auto eos_xderivs = composition_derivatives(eos_state); + + // now the e row -- this depends on whether we are evolving (rho E) or (rho e) + auto denom = 1.0_rt / eos_state.rho; + auto xn_sum = 0.0_rt; + for (int n = 0; n < NumSpec; ++n) { + xn_sum += eos_state.xn[n] * eos_xderivs.dedX[n]; + } + + if (sdc_solve_for_rhoe == 1) { + dwdU(iwe, 0) = denom * (xn_sum - eos_state.e); + } else { + auto u2_sum = 0.0_rt; + for (auto n = 0; n < 3; ++n) { + u2_sum += U_full[UMX+n] * U_full[UMX+n]; + } + dwdU(iwe, 0) = denom * (xn_sum - eos_state.e - + 0.5_rt * u2_sum / (eos_state.rho * eos_state.rho)); + } + + for (int m = 0; m < NumSpec; ++m) { + dwdU(iwe,m+1) = -denom * eos_xderivs.dedX[m]; + } + + dwdU(iwe, NumSpec+1) = denom; + + // construct the Jacobian -- we can get most of the + // terms from the network itself, but we do not rely on + // it having derivative wrt density + + // Note: Jac is 1-based, because that is what the linear algebra + // routines expect but the components, dRdw and dwdU are 0-based! + + for (int n = 1; n <= NumSpec+2; ++n) { + for (int m = 1; m <= NumSpec+2; ++m) { + Jac(n, m) = 0.0_rt; + } + } + + for (int m = 1; m <= NumSpec+2; ++m) { + Jac(m, m) = 1.0_rt; + } + + // auto dRdU = matmul(dRdw, dwdU); + // dRdU is 0-based + + for (int n = 0; n <= NumSpec+1; ++n) { + for (int m = 0; m <= NumSpec+1; ++m) { + dRdU(n,m) = 0.0_rt; + for (int l = 0; l <= NumSpec+1; ++l) { + dRdU(n,m) += dRdw(n,l) * dwdU(l,m); + } + } + } + + for (int n = 0; n < NumSpec+2; ++n) { + for (int m = 0; m < NumSpec+2; ++m) { + Jac(n+1, m+1) -= dt_m * dRdU(n,m); + } + } + +} + + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +sdc_newton_solve(const Real dt_m, + GpuArray const& U_old, + GpuArray & U_new, + GpuArray const& C, + const int sdc_iteration, + Real& err_out, + int& ierr) { + // the purpose of this function is to solve the system + // U - dt R(U) = U_old + dt C using a Newton solve. + // + // here, U_new should come in as a guess for the new U + // and will be returned with the value that satisfied the + // nonlinear function + + RArray2D Jac; + + // we will do the implicit update of only the terms that + // have reactive sources + // + // 0 : rho + // 1:NumSpec : species + // NumSpec+1 : (rho E) or (rho e) + + GpuArray U_react; + GpuArray f_source; + GpuArray mom_source; + GpuArray dU_react; + GpuArray f; + RArray1D f_rhs; + + const int MAX_ITER = 100; + + ierr = NEWTON_SUCCESS; + + // the tolerance we are solving to may depend on the + // iteration + Real relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); + Real tol_dens = sdc_solver_tol_dens * relax_fac; + Real tol_spec = sdc_solver_tol_spec * relax_fac; + Real tol_ener = sdc_solver_tol_ener * relax_fac; + + // update the momenta for this zone -- they don't react + for (int n = 0; n < 3; ++n) { + U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; + } + + // now only save the subset that participates in the + // nonlinear solve -- note: we include the old state in + // f_source + + // load rpar + + // for the Jacobian solve, we are solving + // f(U) = U - dt R(U) - U_old - dt C = 0 + // we define f_source = U_old + dt C so we are solving + // f(U) = U - dt R(U) - f_source = 0 + + f_source[0] = U_old[URHO] + dt_m * C[URHO]; + for (int n = 0; n < NumSpec; ++n) { + f_source[1 + n] = U_old[UFS + n] + dt_m * C[UFS + n]; + } + if (sdc_solve_for_rhoe == 1) { + f_source[NumSpec+1] = U_old[UEINT] + dt_m * C[UEINT]; + } else { + f_source[NumSpec+1] = U_old[UEDEN] + dt_m * C[UEDEN]; + } + + // set the momenta to be U_new + for (int n = 0; n < 3; ++n) { + mom_source[n] = U_new[UMX+n]; + } + + // temperature will be used as an initial guess in the EOS + + Real T_old = U_old[UTEMP]; + + // we should be able to do an update for this somehow? + + Real E_var; + + if (sdc_solve_for_rhoe == 1) { + E_var = U_new[UEDEN]; + } else { + E_var = U_new[UEINT]; + } + + // store the subset for the nonlinear solve + // We use an initial guess if possible + U_react[0] = U_new[URHO]; + for (int n = 0; n < NumSpec; ++n) { + U_react[1+n] = U_new[UFS+n]; + } + if (sdc_solve_for_rhoe == 1) { + U_react[NumSpec+1] = U_new[UEINT]; + } else { + U_react[NumSpec+1] = U_new[UEDEN]; + } + +#if (INTEGRATOR == 0) + + // do a simple Newton solve + + // iterative loop + int iter = 0; + int max_newton_iter = MAX_ITER; + + Real err = 1.e30_rt; + bool converged = false; + + while (!converged && iter < max_newton_iter) { + int info = 0; + f_sdc_jac(dt_m, U_react, f, Jac, f_source, mom_source, T_old, E_var); + + IArray1D ipvt; + + // solve the linear system: Jac dU_react = -f +#ifdef NEW_NETWORK_IMPLEMENTATION + RHS::dgefa(Jac); + info = 0; +#else + dgefa(Jac, ipvt, info); +#endif + if (info != 0) { + ierr = SINGULAR_MATRIX; + return; + } + + for (int n = 1; n <= NumSpec+2; ++n) { + f_rhs(n) = -f[n-1]; + } + +#ifdef NEW_NETWORK_IMPLEMENTATION + RHS::dgesl(Jac, f_rhs); +#else + dgesl(Jac, ipvt, f_rhs); +#endif + + for (int n = 0; n < NumSpec+2; ++n) { + dU_react[n] = f_rhs(n+1); + } + + // how much of dU_react should we apply? + Real eta = 1.0_rt; + for (int n = 0; n < NumSpec+2; ++n) { + dU_react[n] *= eta; + U_react[n] += dU_react[n]; + } + + GpuArray eps_tot; + + eps_tot[0] = tol_dens * std::abs(U_react[0]) + sdc_solver_atol; + + // for species, atol is the mass fraction limit, so we + // multiply by density to get a partial density limit + for (int n = 0; n < NumSpec; ++n) { + eps_tot[1 + n] = tol_spec * std::abs(U_react[1 + n]) + sdc_solver_atol * std::abs(U_react[0]); + } + eps_tot[NumSpec+1] = tol_ener * std::abs(U_react[NumSpec+1]) + sdc_solver_atol; + + // compute the norm of the weighted error, where the + // weights are 1/eps_tot + auto err_sum = 0.0_rt; + for (int n = 0; n < NumSpec+2; ++n) { + err_sum += dU_react[n] * dU_react[n] / (eps_tot[n]* eps_tot[n]); + } + err = std::sqrt(err_sum / (NumSpec+2)); + + if (err < 1.0_rt) { + converged = true; + } + iter++; + } + + err_out = err; + + if (!converged) { + ierr = CONVERGENCE_FAILURE; + return; + } + +#endif + + // update the full U_new + // if we updated total energy, then correct internal, + // or vice versa + U_new[URHO] = U_react[0]; + for (int n = 0; n < NumSpec; ++n) { + U_new[UFS+n] = U_react[1+n]; + } + auto v2 = 0.0_rt; + for (int m = 0; m < 3; ++m) { + v2 += U_new[UMX+m] * U_new[UMX+m]; + } + + if (sdc_solve_for_rhoe == 1) { + U_new[UEINT] = U_react[NumSpec+1]; + U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; + } else { + U_new[UEDEN] = U_react[NumSpec+1]; + U_new[UEINT] = U_new[UEDEN] - 0.5_rt * v2 / U_new[URHO]; + } +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +sdc_newton_subdivide(const Real dt_m, + GpuArray const& U_old, + GpuArray& U_new, + GpuArray const& C, + const int sdc_iteration, + Real& err_out, + int& ierr) { + // This is the driver for solving the nonlinear update for + // the reating/advecting system using Newton's method. It + // attempts to do the solution for the full dt_m requested, + // but if it fails, will subdivide the domain until it + // converges or reaches our limit on the number of + // subintervals. + + const int MAX_NSUB = 64; + GpuArray U_begin; + + // subdivide the timestep and do multiple Newtons. We come + // in here with an initial guess for the new solution + // stored in U_new. That only really makes sense for the + // case where we have 1 substep. Otherwise, we should just + // use the old time solution. + + int nsub = 1; + ierr = CONVERGENCE_FAILURE; + + for (int n = 0; n < NUM_STATE; ++n) { + U_begin[n] = U_old[n]; + } + + while (nsub < MAX_NSUB && ierr != NEWTON_SUCCESS) { + if (nsub > 1) { + for (int n = 0; n < NUM_STATE; ++n) { + U_new[n] = U_old[n]; + } + } + Real dt_sub = dt_m / nsub; + + for (int isub = 0; isub < nsub; ++isub) { + // normalize species + Real sum_rhoX = 0.0_rt; + for (int n = 0; n < NumSpec; ++n) { + U_begin[UFS + n] = amrex::max(network_rp::small_x, U_begin[UFS + n]); + sum_rhoX += U_begin[UFS + n]; + } + for (int n = 0; n < NumSpec; ++n) { + U_begin[UFS + n] *= U_begin[URHO] / sum_rhoX; + } + + sdc_newton_solve(dt_sub, U_begin, U_new, C, sdc_iteration, err_out, ierr); + + for (int n = 0; n < NUM_STATE; ++n) { + U_begin[n] = U_new[n]; + } + } + nsub *= 2; + } +} +#endif + +#endif From fcf55d9b1108ae0d9518afeef262c9cba0c84419 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 18 Sep 2023 15:19:46 -0400 Subject: [PATCH 03/13] start removing density from the reacting system --- Source/driver/_cpp_parameters | 3 - Source/reactions/Castro_react_util.H | 4 - Source/sdc/sdc_newton_solve.H | 215 +++++++++------------------ Source/sdc/sdc_react_util.H | 98 ++---------- 4 files changed, 84 insertions(+), 236 deletions(-) diff --git a/Source/driver/_cpp_parameters b/Source/driver/_cpp_parameters index e6e06797ec..ec98042eb0 100644 --- a/Source/driver/_cpp_parameters +++ b/Source/driver/_cpp_parameters @@ -267,9 +267,6 @@ sdc_extra int 0 # which SDC nonlinear solver to use? 1 = Newton, 2 = VODE, 3 = VODE for first iter sdc_solver int 1 -# relative tolerance for the nonlinear solve on rho with SDC -sdc_solver_tol_dens Real 1.e-6 - # relative tolerance for the nonlinear solve on rho X_k with SDC sdc_solver_tol_spec Real 1.e-6 diff --git a/Source/reactions/Castro_react_util.H b/Source/reactions/Castro_react_util.H index f64bfc6836..756f088649 100644 --- a/Source/reactions/Castro_react_util.H +++ b/Source/reactions/Castro_react_util.H @@ -10,10 +10,6 @@ #include -constexpr int iwrho = 0; -constexpr int iwfs = 1; -constexpr int iwe = NumSpec+1; - // #include AMREX_GPU_HOST_DEVICE AMREX_INLINE diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index df0f0faebe..30c1966720 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -14,10 +14,11 @@ constexpr int CONVERGENCE_FAILURE = -2; AMREX_GPU_HOST_DEVICE AMREX_INLINE void f_sdc_jac(const Real dt_m, - GpuArray const& U, - GpuArray& f, + Array1D const& U, + Array1D& f, RArray2D& Jac, - GpuArray& f_source, + Array1D& f_source, + const Real rho, GpuArray& mom_source, const Real T_old, const Real E_var) { @@ -26,28 +27,20 @@ f_sdc_jac(const Real dt_m, GpuArray U_full; GpuArray R_full; - GpuArray R_react; - Array2D dRdw = {0.0_rt}; - Array2D dwdU = {0.0_rt}; - - Array2D dRdU; + Array1D R_react; // we are not solving the momentum equations // create a full state -- we need this for some interfaces - U_full[URHO] = U[0]; + + U_full[URHO] = rho; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = U[1+n]; + U_full[UFS+n] = U(n+1); } - if (sdc_solve_for_rhoe == 1) { - U_full[UEINT] = U[NumSpec+1]; - U_full[UEDEN] = E_var; - } else { - U_full[UEDEN] = U[NumSpec+1]; - U_full[UEINT] = E_var; - } + U_full[UEINT] = U[NumSpec+1]; + U_full[UEDEN] = E_var; for (int n = 0; n < 3; ++n) { U_full[UMX+n] = mom_source[n]; @@ -76,107 +69,52 @@ f_sdc_jac(const Real dt_m, eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; } #if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - eos_state.aux[n] = U_full[UFX+n] / U_full[URHO]; - } + amrex::Error("error: aux data not currently supported in true SDC"); #endif - eos_state.e = U_full[UEINT] / U_full[URHO]; // (U_full[UEDEN] - 0.5_rt*sum(U_full(UMX:UMZ))/U_full[URHO])/U_full[URHO] + + eos_state.e = U_full[UEINT] / U_full[URHO]; eos(eos_input_re, eos_state); U_full[UTEMP] = eos_state.T; // we'll create a burn_state to pass stuff from the RHS to the Jac function + burn_t burn_state; single_zone_react_source(U_full, R_full, burn_state); // store the subset of R used in the Jacobian - R_react[0] = R_full[URHO]; + for (int n = 0; n < NumSpec; ++n) { R_react[n+1] = R_full[UFS+n]; } - if (sdc_solve_for_rhoe == 1) { - R_react[NumSpec+1] = R_full[UEINT]; - } else { - R_react[NumSpec+1] = R_full[UEDEN]; - } - - for (int n = 0; n < NumSpec+2; ++n) { - f[n] = U[n] - dt_m * R_react[n] - f_source[n]; - } - - // get dRdw -- this may do a numerical approximation or use the - // network's analytic Jac - single_zone_jac(U_full, burn_state, dt_m, dRdw); + R_react[NumSpec+1] = R_full[UEINT]; - // the density row - dwdU(iwrho, 0) = 1.0_rt; + // we are solving J dU = -f + // where f is Eq. 36 evaluated with the currect guess for U - // the X_k rows - for (int m = 1; m < NumSpec+1; ++m) { - dwdU(iwfs-1+m, 0) = -U[m] / (U[0]*U[0]); - dwdU(iwfs-1+m, m) = 1.0_rt / U[0]; - } - - auto eos_xderivs = composition_derivatives(eos_state); - - // now the e row -- this depends on whether we are evolving (rho E) or (rho e) - auto denom = 1.0_rt / eos_state.rho; - auto xn_sum = 0.0_rt; - for (int n = 0; n < NumSpec; ++n) { - xn_sum += eos_state.xn[n] * eos_xderivs.dedX[n]; - } - - if (sdc_solve_for_rhoe == 1) { - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e); - } else { - auto u2_sum = 0.0_rt; - for (auto n = 0; n < 3; ++n) { - u2_sum += U_full[UMX+n] * U_full[UMX+n]; - } - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e - - 0.5_rt * u2_sum / (eos_state.rho * eos_state.rho)); + for (int n = 1; n < NumSpec+1; ++n) { + f[n] = U[n] - dt_m * R_react[n] - f_source[n]; } - for (int m = 0; m < NumSpec; ++m) { - dwdU(iwe,m+1) = -denom * eos_xderivs.dedX[m]; - } + // get the Jacobian. - dwdU(iwe, NumSpec+1) = denom; + /// First we get the dR/dU expression. + // Instead of the decomposition into dw/dU and dR/dw + // written out in the original paper Appendix A, we instead use the + // form from the simplified-SDC paper (Zingale et al. 2022). Note: + // we are not including density anymore. - // construct the Jacobian -- we can get most of the - // terms from the network itself, but we do not rely on - // it having derivative wrt density + single_zone_jac(U_full, burn_state, dt_m, Jac); - // Note: Jac is 1-based, because that is what the linear algebra - // routines expect but the components, dRdw and dwdU are 0-based! + // Our Jacobian has the form: J = I - dt dR/dw dwdU + // (Eq. 38), so now we fix that for (int n = 1; n <= NumSpec+2; ++n) { for (int m = 1; m <= NumSpec+2; ++m) { - Jac(n, m) = 0.0_rt; - } - } - - for (int m = 1; m <= NumSpec+2; ++m) { - Jac(m, m) = 1.0_rt; - } - - // auto dRdU = matmul(dRdw, dwdU); - // dRdU is 0-based - - for (int n = 0; n <= NumSpec+1; ++n) { - for (int m = 0; m <= NumSpec+1; ++m) { - dRdU(n,m) = 0.0_rt; - for (int l = 0; l <= NumSpec+1; ++l) { - dRdU(n,m) += dRdw(n,l) * dwdU(l,m); - } - } - } - - for (int n = 0; n < NumSpec+2; ++n) { - for (int m = 0; m < NumSpec+2; ++m) { - Jac(n+1, m+1) -= dt_m * dRdU(n,m); + Real coeff = (m == n) ? 1.0_rt : 0.0_rt; + Jac(n, m) = coeff - dt_m * Jac(n, m); } } @@ -194,6 +132,7 @@ sdc_newton_solve(const Real dt_m, int& ierr) { // the purpose of this function is to solve the system // U - dt R(U) = U_old + dt C using a Newton solve. + // This is Eq. 36 in the paper. // // here, U_new should come in as a guess for the new U // and will be returned with the value that satisfied the @@ -204,15 +143,14 @@ sdc_newton_solve(const Real dt_m, // we will do the implicit update of only the terms that // have reactive sources // - // 0 : rho // 1:NumSpec : species - // NumSpec+1 : (rho E) or (rho e) + // NumSpec+1 : (rho e) - GpuArray U_react; - GpuArray f_source; + Array1D U_react; + Array1D f_source; GpuArray mom_source; - GpuArray dU_react; - GpuArray f; + Array1D dU_react; + Array1D f; RArray1D f_rhs; const int MAX_ITER = 100; @@ -222,11 +160,13 @@ sdc_newton_solve(const Real dt_m, // the tolerance we are solving to may depend on the // iteration Real relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); - Real tol_dens = sdc_solver_tol_dens * relax_fac; Real tol_spec = sdc_solver_tol_spec * relax_fac; Real tol_ener = sdc_solver_tol_ener * relax_fac; - // update the momenta for this zone -- they don't react + // update the density and momenta for this zone -- they don't react + + U_new[URHO] = U_old[URHO] + dt_m * C[URHO]; + for (int n = 0; n < 3; ++n) { U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; } @@ -235,22 +175,15 @@ sdc_newton_solve(const Real dt_m, // nonlinear solve -- note: we include the old state in // f_source - // load rpar - // for the Jacobian solve, we are solving // f(U) = U - dt R(U) - U_old - dt C = 0 // we define f_source = U_old + dt C so we are solving // f(U) = U - dt R(U) - f_source = 0 - f_source[0] = U_old[URHO] + dt_m * C[URHO]; for (int n = 0; n < NumSpec; ++n) { f_source[1 + n] = U_old[UFS + n] + dt_m * C[UFS + n]; } - if (sdc_solve_for_rhoe == 1) { - f_source[NumSpec+1] = U_old[UEINT] + dt_m * C[UEINT]; - } else { - f_source[NumSpec+1] = U_old[UEDEN] + dt_m * C[UEDEN]; - } + f_source[NumSpec+1] = U_old[UEINT] + dt_m * C[UEINT]; // set the momenta to be U_new for (int n = 0; n < 3; ++n) { @@ -263,25 +196,19 @@ sdc_newton_solve(const Real dt_m, // we should be able to do an update for this somehow? - Real E_var; + Real E_var = U_new[UEDEN]; - if (sdc_solve_for_rhoe == 1) { - E_var = U_new[UEDEN]; - } else { - E_var = U_new[UEINT]; - } + // we need the density too + + Real rho_new = U_new[URHO]; // store the subset for the nonlinear solve // We use an initial guess if possible - U_react[0] = U_new[URHO]; + for (int n = 0; n < NumSpec; ++n) { U_react[1+n] = U_new[UFS+n]; } - if (sdc_solve_for_rhoe == 1) { - U_react[NumSpec+1] = U_new[UEINT]; - } else { - U_react[NumSpec+1] = U_new[UEDEN]; - } + U_react[NumSpec+1] = U_new[UEINT]; #if (INTEGRATOR == 0) @@ -289,14 +216,13 @@ sdc_newton_solve(const Real dt_m, // iterative loop int iter = 0; - int max_newton_iter = MAX_ITER; Real err = 1.e30_rt; bool converged = false; - while (!converged && iter < max_newton_iter) { + while (!converged && iter < MAX_ITER) { int info = 0; - f_sdc_jac(dt_m, U_react, f, Jac, f_source, mom_source, T_old, E_var); + f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var); IArray1D ipvt; @@ -305,15 +231,15 @@ sdc_newton_solve(const Real dt_m, RHS::dgefa(Jac); info = 0; #else - dgefa(Jac, ipvt, info); + dgefa(Jac, ipvt, info); #endif if (info != 0) { ierr = SINGULAR_MATRIX; return; } - for (int n = 1; n <= NumSpec+2; ++n) { - f_rhs(n) = -f[n-1]; + for (int n = 1; n <= NumSpec+1; ++n) { + f_rhs(n) = -f[n]; } #ifdef NEW_NETWORK_IMPLEMENTATION @@ -322,35 +248,36 @@ sdc_newton_solve(const Real dt_m, dgesl(Jac, ipvt, f_rhs); #endif - for (int n = 0; n < NumSpec+2; ++n) { - dU_react[n] = f_rhs(n+1); + for (int n = 1; n < NumSpec+1; ++n) { + dU_react[n] = f_rhs(n); } // how much of dU_react should we apply? Real eta = 1.0_rt; - for (int n = 0; n < NumSpec+2; ++n) { + for (int n = 1; n < NumSpec+1; ++n) { dU_react[n] *= eta; U_react[n] += dU_react[n]; } - GpuArray eps_tot; - - eps_tot[0] = tol_dens * std::abs(U_react[0]) + sdc_solver_atol; + Array1D eps_tot; // for species, atol is the mass fraction limit, so we // multiply by density to get a partial density limit + for (int n = 0; n < NumSpec; ++n) { - eps_tot[1 + n] = tol_spec * std::abs(U_react[1 + n]) + sdc_solver_atol * std::abs(U_react[0]); + eps_tot[1 + n] = tol_spec * std::abs(U_react[1 + n]) + + sdc_solver_atol * std::abs(U_new[URHO]); } eps_tot[NumSpec+1] = tol_ener * std::abs(U_react[NumSpec+1]) + sdc_solver_atol; // compute the norm of the weighted error, where the // weights are 1/eps_tot + auto err_sum = 0.0_rt; - for (int n = 0; n < NumSpec+2; ++n) { - err_sum += dU_react[n] * dU_react[n] / (eps_tot[n]* eps_tot[n]); + for (int n = 1; n < NumSpec+1; ++n) { + err_sum += dU_react[n] * dU_react[n] / (eps_tot[n] * eps_tot[n]); } - err = std::sqrt(err_sum / (NumSpec+2)); + err = std::sqrt(err_sum / (NumSpec+1)); if (err < 1.0_rt) { converged = true; @@ -368,9 +295,7 @@ sdc_newton_solve(const Real dt_m, #endif // update the full U_new - // if we updated total energy, then correct internal, - // or vice versa - U_new[URHO] = U_react[0]; + for (int n = 0; n < NumSpec; ++n) { U_new[UFS+n] = U_react[1+n]; } @@ -379,13 +304,9 @@ sdc_newton_solve(const Real dt_m, v2 += U_new[UMX+m] * U_new[UMX+m]; } - if (sdc_solve_for_rhoe == 1) { - U_new[UEINT] = U_react[NumSpec+1]; - U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; - } else { - U_new[UEDEN] = U_react[NumSpec+1]; - U_new[UEINT] = U_new[UEDEN] - 0.5_rt * v2 / U_new[URHO]; - } + U_new[UEINT] = U_react[NumSpec+1]; + U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; + } AMREX_GPU_HOST_DEVICE AMREX_INLINE diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 3357612879..851318fec6 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -1,6 +1,10 @@ #ifndef SDC_REACT_UTIL_H #define SDC_REACT_UTIL_H +constexpr int iwfs = 1; +constexpr int iwe = NumSpec+1; + + AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_react_source(GpuArray const& state, @@ -29,9 +33,7 @@ single_zone_react_source(GpuArray const& state, eos_t eos_state; // Ensure that the temperature going in is consistent with the internal energy. - burn_to_eos(burn_state, eos_state); - eos(eos_input_re, eos_state); - eos_to_burn(eos_state, burn_state); + eos(eos_input_re, burn_state); // eos_get_small_temp(&small_temp); burn_state.T = amrex::min(MAX_TEMP, amrex::max(burn_state.T, small_temp)); @@ -60,6 +62,7 @@ single_zone_react_source(const int i, const int j, const int k, Array4 const& state, Array4 const& R, burn_t& burn_state) { + GpuArray state_arr; GpuArray R_arr; @@ -79,7 +82,7 @@ AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_jac(GpuArray const& state, burn_t& burn_state, const Real dt, - Array2D& dRdw) { + RArray2D& Jac) { #ifdef SIMPLIFIED_SDC #ifndef AMREX_USE_GPU @@ -89,24 +92,28 @@ single_zone_jac(GpuArray const& state, Array1D ydot; Array1D ydot_pert; - JacNetArray2D Jac; const auto eps = 1.e-8_rt; actual_rhs(burn_state, ydot); // Jac has the derivatives with respect to the native - // network variables, X, T. e. It does not have derivatives with - // respect to density, so we'll have to compute those ourselves. + // network variables, X, e. Note: the e derivative if (sdc_newton_use_analytic_jac == 0) { // note the numerical Jacobian will be returned in terms of X + // and will already have the corrections to convert into d/de + // instead of d/dT + jac_info_t jac_info; jac_info.h = dt; numerical_jac(burn_state, jac_info, Jac); + } else { actual_jac(burn_state, Jac); + // the above will have already multiplied the d/dT terms with 1/c_v + // The Jacobian from the nets is in terms of dYdot/dY, but we want // it was dXdot/dX, so convert here. for (int n = 1; n <= NumSpec; n++) { @@ -122,6 +129,8 @@ single_zone_jac(GpuArray const& state, Jac(m,n) = Jac(m,n) * aion_inv[n-1]; } } + + } #endif @@ -132,81 +141,6 @@ single_zone_jac(GpuArray const& state, ydot(n+1) *= aion[n]; } - // Our jacobian, dR/dw has the form: - // - // / 0 0 0 0 \ - // | d(rho X1dot)/drho d(rho X1dot)/dX1 d(rho X1dit)/dX2 ... d(rho X1dot)/dT | - // | d(rho X2dot)/drho d(rho X2dot)/dX1 d(rho X2dot)/dX2 ... d(rho X2dot)/dT | - // | ... | - // \ d(rho Edot)/drho d(rho Edot)/dX1 d(rho Edot)/dX2 ... d(rho Edot)/dT / - - for (int n = 0; n < NumSpec+2; ++n) { - for (int m = 0; m < NumSpec+2; ++m) { - dRdw(n,m) = 0.0_rt; - } - } - - // now perturb density and call the RHS to compute the derivative wrt rho - // species rates come back in terms of molar fractions - burn_t burn_state_pert = burn_state; - - burn_state_pert.rho = burn_state.rho * (1.0_rt + eps); - burn_state_pert.T = burn_state.T; - burn_state_pert.e = burn_state.e; - for (int n = 0; n < NumSpec; ++n) { - burn_state_pert.xn[n] = burn_state.xn[n]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; ++n) { - burn_state_pert.aux[n] = burn_state.aux[n]; - } -#endif - - actual_rhs(burn_state_pert, ydot_pert); - - // make the rates dX/dt and not dY/dt - for (int n = 0; n < NumSpec; ++n) { - ydot_pert(n+1) *= aion[n]; - } - - // fill the column of dRdw corresponding to the derivative - // with respect to rho - for (int m = 1; m <= NumSpec; ++m) { - // d( d(rho X_m)/dt)/drho - dRdw(m, iwrho) = ydot(m) + - state[URHO] * (ydot_pert(m) - ydot(m))/(eps * burn_state.rho); - } - - // d( d(rho E)/dt)/drho - dRdw(NumSpec+1, iwrho) = ydot(net_ienuc) + - state[URHO] * (ydot_pert(net_ienuc) - ydot(net_ienuc))/(eps * burn_state.rho); - - // fill the columns of dRdw corresponding to each derivative - // with respect to species mass fraction - for (int n = 1; n <= NumSpec; ++n) { - dRdw(0, iwfs-1+n) = 0.0_rt; // density source - - for (int m = 1; m <= NumSpec; ++m) { - // d( d(rho X_m)/dt)/dX_n - dRdw(m, iwfs-1+n) = state[URHO] * Jac(m, n); - } - - // d( d(rho E)/dt)/dX_n - dRdw(NumSpec+1, iwfs-1+n) = state[URHO] * Jac(net_ienuc, n); - - } - - // now fill the column corresponding to derivatives with respect to - // energy -- this column is iwe - dRdw(0, iwe) = 0.0_rt; - - // d( d(rho X_m)/dt)/de - for (int m = 1; m <= NumSpec; ++m) { - dRdw(m, iwe) = state[URHO] * Jac(m, net_ienuc); - } - - // d( d(rho E)/dt)/de - dRdw(NumSpec+1, iwe) = state[URHO] * Jac(net_ienuc, net_ienuc); } #endif From 1ebc26dddfde7d5b96083a2ba35668cf86e5e5ad Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 18 Sep 2023 17:14:05 -0400 Subject: [PATCH 04/13] the Newton stuff compiles now --- Source/sdc/sdc_newton_solve.H | 39 +++++++++++++++++------------------ Source/sdc/sdc_react_util.H | 20 +++++++++++++++++- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index 30c1966720..61feaaae1a 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -16,7 +16,7 @@ void f_sdc_jac(const Real dt_m, Array1D const& U, Array1D& f, - RArray2D& Jac, + JacNetArray2D& Jac, Array1D& f_source, const Real rho, GpuArray& mom_source, @@ -39,7 +39,7 @@ f_sdc_jac(const Real dt_m, U_full[UFS+n] = U(n+1); } - U_full[UEINT] = U[NumSpec+1]; + U_full[UEINT] = U(NumSpec+1); U_full[UEDEN] = E_var; for (int n = 0; n < 3; ++n) { @@ -87,15 +87,15 @@ f_sdc_jac(const Real dt_m, // store the subset of R used in the Jacobian for (int n = 0; n < NumSpec; ++n) { - R_react[n+1] = R_full[UFS+n]; + R_react(n+1) = R_full[UFS+n]; } - R_react[NumSpec+1] = R_full[UEINT]; + R_react(NumSpec+1) = R_full[UEINT]; // we are solving J dU = -f // where f is Eq. 36 evaluated with the currect guess for U for (int n = 1; n < NumSpec+1; ++n) { - f[n] = U[n] - dt_m * R_react[n] - f_source[n]; + f(n) = U(n) - dt_m * R_react(n) - f_source(n); } // get the Jacobian. @@ -138,7 +138,7 @@ sdc_newton_solve(const Real dt_m, // and will be returned with the value that satisfied the // nonlinear function - RArray2D Jac; + JacNetArray2D Jac; // we will do the implicit update of only the terms that // have reactive sources @@ -181,9 +181,9 @@ sdc_newton_solve(const Real dt_m, // f(U) = U - dt R(U) - f_source = 0 for (int n = 0; n < NumSpec; ++n) { - f_source[1 + n] = U_old[UFS + n] + dt_m * C[UFS + n]; + f_source(1 + n) = U_old[UFS + n] + dt_m * C[UFS + n]; } - f_source[NumSpec+1] = U_old[UEINT] + dt_m * C[UEINT]; + f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT]; // set the momenta to be U_new for (int n = 0; n < 3; ++n) { @@ -206,9 +206,9 @@ sdc_newton_solve(const Real dt_m, // We use an initial guess if possible for (int n = 0; n < NumSpec; ++n) { - U_react[1+n] = U_new[UFS+n]; + U_react(1+n) = U_new[UFS+n]; } - U_react[NumSpec+1] = U_new[UEINT]; + U_react(NumSpec+1) = U_new[UEINT]; #if (INTEGRATOR == 0) @@ -239,7 +239,7 @@ sdc_newton_solve(const Real dt_m, } for (int n = 1; n <= NumSpec+1; ++n) { - f_rhs(n) = -f[n]; + f_rhs(n) = -f(n); } #ifdef NEW_NETWORK_IMPLEMENTATION @@ -249,14 +249,13 @@ sdc_newton_solve(const Real dt_m, #endif for (int n = 1; n < NumSpec+1; ++n) { - dU_react[n] = f_rhs(n); + dU_react(n) = f_rhs(n); } // how much of dU_react should we apply? Real eta = 1.0_rt; for (int n = 1; n < NumSpec+1; ++n) { - dU_react[n] *= eta; - U_react[n] += dU_react[n]; + U_react(n) += eta * dU_react(n); } Array1D eps_tot; @@ -265,19 +264,19 @@ sdc_newton_solve(const Real dt_m, // multiply by density to get a partial density limit for (int n = 0; n < NumSpec; ++n) { - eps_tot[1 + n] = tol_spec * std::abs(U_react[1 + n]) + + eps_tot(1 + n) = tol_spec * std::abs(U_react(1 + n)) + sdc_solver_atol * std::abs(U_new[URHO]); } - eps_tot[NumSpec+1] = tol_ener * std::abs(U_react[NumSpec+1]) + sdc_solver_atol; + eps_tot(NumSpec+1) = tol_ener * std::abs(U_react(NumSpec+1)) + sdc_solver_atol; // compute the norm of the weighted error, where the // weights are 1/eps_tot auto err_sum = 0.0_rt; for (int n = 1; n < NumSpec+1; ++n) { - err_sum += dU_react[n] * dU_react[n] / (eps_tot[n] * eps_tot[n]); + err_sum += dU_react(n) * dU_react(n) / (eps_tot(n) * eps_tot(n)); } - err = std::sqrt(err_sum / (NumSpec+1)); + err = std::sqrt(err_sum / static_cast(NumSpec+1)); if (err < 1.0_rt) { converged = true; @@ -297,14 +296,14 @@ sdc_newton_solve(const Real dt_m, // update the full U_new for (int n = 0; n < NumSpec; ++n) { - U_new[UFS+n] = U_react[1+n]; + U_new[UFS+n] = U_react(1+n); } auto v2 = 0.0_rt; for (int m = 0; m < 3; ++m) { v2 += U_new[UMX+m] * U_new[UMX+m]; } - U_new[UEINT] = U_react[NumSpec+1]; + U_new[UEINT] = U_react(NumSpec+1); U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; } diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 851318fec6..5a03210a08 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -82,7 +82,7 @@ AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_jac(GpuArray const& state, burn_t& burn_state, const Real dt, - RArray2D& Jac) { + JacNetArray2D& Jac) { #ifdef SIMPLIFIED_SDC #ifndef AMREX_USE_GPU @@ -130,7 +130,25 @@ single_zone_jac(GpuArray const& state, } } + // now correct the species derivatives + // this constructs dy/dX_k |_e = dy/dX_k |_T - e_{X_k} |_T dy/dT / c_v + eos_re_extra_t eos_state; + eos_state.rho = burn_state.rho; + eos_state.T = burn_state.T; + eos_state.e = burn_state.e; + for (int n = 0; n < NumSpec; n++) { + eos_state.xn[n] = burn_state.xn[n]; + } + + eos(eos_input_re, eos_state); + eos_xderivs_t eos_xderivs = composition_derivatives(eos_state); + + for (int m = 1; m <= neqs; m++) { + for (int n = 1; n <= NumSpec; n++) { + Jac(m, n) -= eos_xderivs.dedX[n-1] * Jac(m, net_ienuc); + } + } } #endif From 27f8b32306966cbaed60c736cc6327a3bd03247b Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 18 Sep 2023 17:33:37 -0400 Subject: [PATCH 05/13] this builds and runs note: VODE is commented out --- Source/sdc/Castro_sdc_util.H | 136 +-------------------------------- Source/sdc/sdc_newton_solve.H | 4 +- Source/sdc/vode_rhs_true_sdc.H | 134 ++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 135 deletions(-) diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index 09581b06e8..45a073dc86 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -8,7 +8,7 @@ #endif #include #include -#include +//#include #ifndef NEW_NETWORK_IMPLEMENTATION #include #endif @@ -47,138 +47,6 @@ normalize_species_sdc(const int i, const int j, const int k, -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void -sdc_vode_solve(const Real dt_m, - GpuArray const& U_old, - GpuArray& U_new, - GpuArray const& C, - const int sdc_iteration) { - - // The purpose of this function is to solve the system the - // approximate system dU/dt = R + C using the VODE ODE - // integrator. - // The solution we get here will then be used as the - // initial guess to the Newton solve on the real system. - - // We will do the implicit update of only the terms that - // have reactive sources - // - // 0 : rho - // 1:NumSpec : species - // NumSpec+1 : (rho E) or (rho e) - -#if (INTEGRATOR == 0) - - // The tolerance we are solving to may depend on the - // iteration - auto relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); - auto tol_dens = sdc_solver_tol_dens * relax_fac; - auto tol_spec = sdc_solver_tol_spec * relax_fac; - auto tol_ener = sdc_solver_tol_ener * relax_fac; - - // Update the momenta for this zone -- they don't react - for (int n = 0; n < 3; ++n) { - U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; - } - - // Now only save the subset that participates in the - // nonlinear solve -- note: we include the old state - // in f_source - - // load rpar - - // If we are solving the system as an ODE, then we are - // solving - // dU/dt = R(U) + C - // so we simply pass in C - GpuArray C_react; - - C_react[0] = C[URHO]; - for (int n = 0; n < NumSpec; ++n) { - C_react[1+n] = C[UFS+n]; - } - C_react[NumSpec+1] = C[UEINT]; - - dvode_t dvode_state; - - burn_t burn_state; - - burn_state.success = true; - - for (int n = 0; n < NumSpec+2; n++) { - burn_state.f_source[n] = C_react[n]; - } - - for (int n = 0; n < 3; n++) { - burn_state.mom[n] = U_new[UMX+n]; - } - - // we are always solving for rhoe with the VODE predict - burn_state.e = U_new[UEDEN]; - - burn_state.E_var = U_old[UEDEN]; - - // temperature will be used as an initial guess in the EOS - burn_state.T = U_old[UTEMP]; - - - // store the subset for the nonlinear solve. We only - // consider (rho e), not (rho E). This is because at - // present we do not have a method of updating the - // velocities during the multistep integration. - - // Also note that the dvode_state is 1-based, but we'll - // access it as 0-based in our implementation of the - // RHS routine - - dvode_state.y(1) = U_old[URHO]; - for (int n = 0; n < NumSpec; ++n) { - dvode_state.y(2+n) = U_old[UFS+n]; - } - dvode_state.y(NumSpec+2) = U_old[UEINT]; - - // // set the maximum number of steps allowed - // dvode_state.MXSTEP = 25000; - - dvode_state.t = 0.0_rt; - dvode_state.tout = dt_m; - dvode_state.HMXI = 1.0_rt / ode_max_dt; - - // relative tolerances - dvode_state.rtol_dens = tol_dens; - dvode_state.rtol_spec = tol_spec; - dvode_state.rtol_enuc = tol_ener; - - // absolute tolerances - dvode_state.atol_dens = sdc_solver_atol * U_old[URHO]; - dvode_state.atol_spec = sdc_solver_atol * U_old[URHO]; // this way, atol is the minimum x - dvode_state.atol_enuc = sdc_solver_atol * U_old[UEINT]; - - int istate = dvode(burn_state, dvode_state); - - if (istate < 0) { - Abort("Vode terminated poorly"); - } - - // update the full U_new - U_new[URHO] = dvode_state.y(1); - for (int n = 0; n < NumSpec; ++n) { - U_new[UFS+n] = dvode_state.y(2+n); - } - U_new[UEINT] = dvode_state.y(NumSpec+2); - auto v2 = 0.0_rt; - for (int n = 0; n < 3; ++n) { - v2 += U_new[UMX+n] * U_new[UMX+n]; - } - U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; - - // keep our temperature guess - U_new[UTEMP] = U_old[UTEMP]; - -#endif -} - AMREX_GPU_HOST_DEVICE AMREX_INLINE void sdc_solve(const Real dt_m, @@ -207,6 +75,7 @@ sdc_solve(const Real dt_m, if (ierr != NEWTON_SUCCESS) { Abort("Newton subcycling failed in sdc_solve"); } +#if 0 } else if (sdc_solver == VODE_SOLVE) { // Use VODE to do the solution sdc_vode_solve(dt_m, U_old, U_new, C, sdc_iteration); @@ -226,6 +95,7 @@ sdc_solve(const Real dt_m, if (ierr != NEWTON_SUCCESS) { Abort("Newton failure in sdc_solve"); } +#endif } } diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index 61feaaae1a..981e0e1218 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -111,8 +111,8 @@ f_sdc_jac(const Real dt_m, // Our Jacobian has the form: J = I - dt dR/dw dwdU // (Eq. 38), so now we fix that - for (int n = 1; n <= NumSpec+2; ++n) { - for (int m = 1; m <= NumSpec+2; ++m) { + for (int n = 1; n <= NumSpec+1; ++n) { + for (int m = 1; m <= NumSpec+1; ++m) { Real coeff = (m == n) ? 1.0_rt : 0.0_rt; Jac(n, m) = coeff - dt_m * Jac(n, m); } diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index bd12ae4e2c..94d282d4d0 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -176,5 +176,139 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p } } + + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +sdc_vode_solve(const Real dt_m, + GpuArray const& U_old, + GpuArray& U_new, + GpuArray const& C, + const int sdc_iteration) { + + // The purpose of this function is to solve the system the + // approximate system dU/dt = R + C using the VODE ODE + // integrator. + // The solution we get here will then be used as the + // initial guess to the Newton solve on the real system. + + // We will do the implicit update of only the terms that + // have reactive sources + // + // 0 : rho + // 1:NumSpec : species + // NumSpec+1 : (rho E) or (rho e) + +#if (INTEGRATOR == 0) + + // The tolerance we are solving to may depend on the + // iteration + auto relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); + auto tol_dens = sdc_solver_tol_dens * relax_fac; + auto tol_spec = sdc_solver_tol_spec * relax_fac; + auto tol_ener = sdc_solver_tol_ener * relax_fac; + + // Update the momenta for this zone -- they don't react + for (int n = 0; n < 3; ++n) { + U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; + } + + // Now only save the subset that participates in the + // nonlinear solve -- note: we include the old state + // in f_source + + // load rpar + + // If we are solving the system as an ODE, then we are + // solving + // dU/dt = R(U) + C + // so we simply pass in C + GpuArray C_react; + + C_react[0] = C[URHO]; + for (int n = 0; n < NumSpec; ++n) { + C_react[1+n] = C[UFS+n]; + } + C_react[NumSpec+1] = C[UEINT]; + + dvode_t dvode_state; + + burn_t burn_state; + + burn_state.success = true; + + for (int n = 0; n < NumSpec+2; n++) { + burn_state.f_source[n] = C_react[n]; + } + + for (int n = 0; n < 3; n++) { + burn_state.mom[n] = U_new[UMX+n]; + } + + // we are always solving for rhoe with the VODE predict + burn_state.e = U_new[UEDEN]; + + burn_state.E_var = U_old[UEDEN]; + + // temperature will be used as an initial guess in the EOS + burn_state.T = U_old[UTEMP]; + + + // store the subset for the nonlinear solve. We only + // consider (rho e), not (rho E). This is because at + // present we do not have a method of updating the + // velocities during the multistep integration. + + // Also note that the dvode_state is 1-based, but we'll + // access it as 0-based in our implementation of the + // RHS routine + + dvode_state.y(1) = U_old[URHO]; + for (int n = 0; n < NumSpec; ++n) { + dvode_state.y(2+n) = U_old[UFS+n]; + } + dvode_state.y(NumSpec+2) = U_old[UEINT]; + + // // set the maximum number of steps allowed + // dvode_state.MXSTEP = 25000; + + dvode_state.t = 0.0_rt; + dvode_state.tout = dt_m; + dvode_state.HMXI = 1.0_rt / ode_max_dt; + + // relative tolerances + dvode_state.rtol_dens = tol_dens; + dvode_state.rtol_spec = tol_spec; + dvode_state.rtol_enuc = tol_ener; + + // absolute tolerances + dvode_state.atol_dens = sdc_solver_atol * U_old[URHO]; + dvode_state.atol_spec = sdc_solver_atol * U_old[URHO]; // this way, atol is the minimum x + dvode_state.atol_enuc = sdc_solver_atol * U_old[UEINT]; + + int istate = dvode(burn_state, dvode_state); + + if (istate < 0) { + Abort("Vode terminated poorly"); + } + + // update the full U_new + U_new[URHO] = dvode_state.y(1); + for (int n = 0; n < NumSpec; ++n) { + U_new[UFS+n] = dvode_state.y(2+n); + } + U_new[UEINT] = dvode_state.y(NumSpec+2); + auto v2 = 0.0_rt; + for (int n = 0; n < 3; ++n) { + v2 += U_new[UMX+n] * U_new[UMX+n]; + } + U_new[UEDEN] = U_new[UEINT] + 0.5_rt * v2 / U_new[URHO]; + + // keep our temperature guess + U_new[UTEMP] = U_old[UTEMP]; + +#endif +} + #endif From 426b6217fb5923f09395ef410a715397b21397d0 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 18 Sep 2023 17:43:19 -0400 Subject: [PATCH 06/13] fix codespell --- Source/sdc/sdc_newton_solve.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index 981e0e1218..9f2c559b61 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -92,7 +92,7 @@ f_sdc_jac(const Real dt_m, R_react(NumSpec+1) = R_full[UEINT]; // we are solving J dU = -f - // where f is Eq. 36 evaluated with the currect guess for U + // where f is Eq. 36 evaluated with the current guess for U for (int n = 1; n < NumSpec+1; ++n) { f(n) = U(n) - dt_m * R_react(n) - f_source(n); From 2298fce570fdd58df51069e84a2db9f234188ca1 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Tue, 19 Sep 2023 10:21:54 -0400 Subject: [PATCH 07/13] some vode work --- Source/sdc/vode_rhs_true_sdc.H | 83 ++++++++++++---------------------- 1 file changed, 28 insertions(+), 55 deletions(-) diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index 94d282d4d0..f749e05dec 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -28,24 +28,20 @@ void rhs(const Real time, burn_t& burn_state, DvodeT& vode_state, RArray1D& dUdt GpuArray U_full; GpuArray R_full; - GpuArray R_react; - // evaluate R + // update the density -- we'll need this for the EOS call in the + // react update + + U_full[URHO] = burn_state.rho_orig + time * state.ydot_a[URHO]; + // we are not solving the momentum equations - // create a full state -- we need this for some interfaces - // note: vode_state % y(:) is 1-based + // and never need total energy, so we don't worry about filling those - U_full[URHO] = vode_state.y(1); for (int n = 0; n < NumSpec; n++) { - U_full[UFS+n] = vode_state.y(2+n); + U_full[UFS+n] = vode_state.y(1+n); } - U_full[UEINT] = vode_state.y(NumSpec+2); - U_full[UEDEN] = burn_state.E_var; - - U_full[UMX] = burn_state.mom[0]; - U_full[UMY] = burn_state.mom[1]; - U_full[UMZ] = burn_state.mom[2]; + U_full[UEINT] = vode_state.y(NumSpec+1); // initialize the temperature -- a better value will be found when we do the EOS // call in single_zone_react_source @@ -62,17 +58,13 @@ void rhs(const Real time, burn_t& burn_state, DvodeT& vode_state, RArray1D& dUdt burn_state.T = burn_state_pass.T; - R_react[0] = R_full[URHO]; - for (int n = 0; n < NumSpec; n++) { - R_react[1+n] = R_full[UFS+n]; - } - R_react[NumSpec+1] = R_full[UEINT]; - - // create the RHS -- this is 1-based + // now construct the RHS -- note this is 1-based - for (int n = 1; n <= NumSpec+2; n++) { - dUdt(n) = R_react[n-1] + burn_state.f_source[n-1]; + for (int n = 0; n < NumSpec; ++n) { + dUdt(n-1) = R_react[UFS+n] + burn_state.ydot_a[UFS+n]; } + dUdt(NumSpec+1) = R_react[UEINT] + burn_state.ydot_a[UEINT]; + } @@ -99,11 +91,7 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p U_full[UFS+n] = vode_state.y(2+n); } U_full[UEINT] = vode_state.y(NumSpec+2); - U_full[UEDEN] = burn_state.E_var; - U_full[UMX] = burn_state.mom[0]; - U_full[UMY] = burn_state.mom[1]; - U_full[UMZ] = burn_state.mom[2]; // compute the temperature and species derivatives -- // maybe this should be done using the burn_state @@ -209,6 +197,7 @@ sdc_vode_solve(const Real dt_m, auto tol_ener = sdc_solver_tol_ener * relax_fac; // Update the momenta for this zone -- they don't react + for (int n = 0; n < 3; ++n) { U_new[UMX+n] = U_old[UMX+n] + dt_m * C[UMX+n]; } @@ -219,36 +208,23 @@ sdc_vode_solve(const Real dt_m, // load rpar - // If we are solving the system as an ODE, then we are - // solving - // dU/dt = R(U) + C - // so we simply pass in C - GpuArray C_react; - - C_react[0] = C[URHO]; - for (int n = 0; n < NumSpec; ++n) { - C_react[1+n] = C[UFS+n]; - } - C_react[NumSpec+1] = C[UEINT]; - dvode_t dvode_state; + dvode_t dvode_state; burn_t burn_state; - burn_state.success = true; + burn_state.rho_orig = U_old[URHO]; - for (int n = 0; n < NumSpec+2; n++) { - burn_state.f_source[n] = C_react[n]; - } + // If we are solving the system as an ODE, then we are + // solving + // dU/dt = R(U) + C + // so we simply pass in C - for (int n = 0; n < 3; n++) { - burn_state.mom[n] = U_new[UMX+n]; + for (int n = 0; n < NUM_STATE; ++n) { + burn_state.ydot_a[n] = C[n]; } - // we are always solving for rhoe with the VODE predict - burn_state.e = U_new[UEDEN]; - - burn_state.E_var = U_old[UEDEN]; + burn_state.success = true; // temperature will be used as an initial guess in the EOS burn_state.T = U_old[UTEMP]; @@ -263,11 +239,10 @@ sdc_vode_solve(const Real dt_m, // access it as 0-based in our implementation of the // RHS routine - dvode_state.y(1) = U_old[URHO]; for (int n = 0; n < NumSpec; ++n) { - dvode_state.y(2+n) = U_old[UFS+n]; + dvode_state.y(1+n) = U_old[UFS+n]; } - dvode_state.y(NumSpec+2) = U_old[UEINT]; + dvode_state.y(NumSpec+1) = U_old[UEINT]; // // set the maximum number of steps allowed // dvode_state.MXSTEP = 25000; @@ -277,12 +252,10 @@ sdc_vode_solve(const Real dt_m, dvode_state.HMXI = 1.0_rt / ode_max_dt; // relative tolerances - dvode_state.rtol_dens = tol_dens; dvode_state.rtol_spec = tol_spec; dvode_state.rtol_enuc = tol_ener; // absolute tolerances - dvode_state.atol_dens = sdc_solver_atol * U_old[URHO]; dvode_state.atol_spec = sdc_solver_atol * U_old[URHO]; // this way, atol is the minimum x dvode_state.atol_enuc = sdc_solver_atol * U_old[UEINT]; @@ -293,11 +266,11 @@ sdc_vode_solve(const Real dt_m, } // update the full U_new - U_new[URHO] = dvode_state.y(1); for (int n = 0; n < NumSpec; ++n) { - U_new[UFS+n] = dvode_state.y(2+n); + U_new[UFS+n] = dvode_state.y(1+n); } - U_new[UEINT] = dvode_state.y(NumSpec+2); + U_new[UEINT] = dvode_state.y(NumSpec+1); + auto v2 = 0.0_rt; for (int n = 0; n < 3; ++n) { v2 += U_new[UMX+n] * U_new[UMX+n]; From 074590d0899e51942b04fefc940e1ea439879ecb Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Tue, 19 Sep 2023 10:27:25 -0400 Subject: [PATCH 08/13] some more cleaning --- Source/sdc/sdc_react_util.H | 11 +---------- Source/sdc/vode_rhs_true_sdc.H | 3 --- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 5a03210a08..cf878c324a 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -90,13 +90,8 @@ single_zone_jac(GpuArray const& state, #endif #else - Array1D ydot; - Array1D ydot_pert; - const auto eps = 1.e-8_rt; - actual_rhs(burn_state, ydot); - // Jac has the derivatives with respect to the native // network variables, X, e. Note: the e derivative @@ -153,11 +148,7 @@ single_zone_jac(GpuArray const& state, #endif // at this point, our Jacobian should be entirely in terms of X, - // not Y. Let's now fix the rhs terms themselves to be in terms of - // dX/dt and not dY/dt. - for (int n = 0; n < NumSpec; ++n) { - ydot(n+1) *= aion[n]; - } + // not Y. } diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index f749e05dec..33e98ea724 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -76,9 +76,6 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p // note the Jacobian is 1-based at the moment - Array2D dRdw = {0.0_rt}; - Array2D dwdU = {0.0_rt}; - GpuArray U_full; GpuArray R_full; GpuArray R_react; From e19d77ba8aec89a66e6a2295bac198bb9c5cb7c1 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Tue, 19 Sep 2023 14:04:19 -0400 Subject: [PATCH 09/13] I think this is how it should be now for VODE --- Source/sdc/sdc_react_util.H | 2 + Source/sdc/vode_rhs_true_sdc.H | 76 ++++------------------------------ 2 files changed, 10 insertions(+), 68 deletions(-) diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index cf878c324a..47c4734d5c 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -128,6 +128,8 @@ single_zone_jac(GpuArray const& state, // now correct the species derivatives // this constructs dy/dX_k |_e = dy/dX_k |_T - e_{X_k} |_T dy/dT / c_v + // TODO: I think that this can be accomplished via burn_state directly! + eos_re_extra_t eos_state; eos_state.rho = burn_state.rho; eos_state.T = burn_state.T; diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index 33e98ea724..ee1818d29e 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -78,87 +78,27 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p GpuArray U_full; GpuArray R_full; - GpuArray R_react; // we are not solving the momentum equations // create a full state -- we need this for some interfaces - U_full[URHO] = vode_state.y(1); - for (int n = 0; n < NumSpec; n++) { - U_full[UFS+n] = vode_state.y(2+n); - } - U_full[UEINT] = vode_state.y(NumSpec+2); - - - // compute the temperature and species derivatives -- - // maybe this should be done using the burn_state - // returned by single_zone_react_source, since it is - // more consistent T from e + U_full[URHO] = burn_state.rho_orig + time * state.ydot_a[URHO]; - eos_extra_t eos_state; - eos_state.rho = U_full[URHO]; - eos_state.T = burn_state.T; // initial guess for (int n = 0; n < NumSpec; n++) { - eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; n++) { - eos_state.aux[n] = U_full[UFX+n] / U_full[URHO]; + U_full[UFS+n] = vode_state.y(1+n); } -#endif - eos_state.e = U_full[UEINT] / U_full[URHO]; //(U_full(UEDEN) - HALF*sum(U_full(UMX:UMZ))/U_full(URHO))/U_full(URHO) - - - eos(eos_input_re, eos_state); + U_full[UEINT] = vode_state.y(NumSpec+1); - U_full[UTEMP] = eos_state.T; + U_full[UTEMP] = burn_state.T; burn_t burn_state_pass; - single_zone_react_source(U_full, R_full, burn_state_pass); - - single_zone_jac(U_full, burn_state_pass, vode_state.H, dRdw); - - // construct dwdU - // the density row + // this will initialize burn_state_pass and fill the temperature + // via the EOS - dwdU(iwrho, 0) = 1.0_rt; - - // the X_k rows - - for (int m = 1; m < NumSpec+1; m++) { - dwdU(iwfs-1+m, 0) = -vode_state.y(m+1) / (vode_state.y(1) * vode_state.y(1)); - dwdU(iwfs-1+m, m) = 1.0_rt / vode_state.y(1); - } - - auto eos_xderivs = composition_derivatives(eos_state); - - // now the e row -- this depends on whether we are evolving (rho E) or (rho e) - - Real denom = 1.0_rt / eos_state.rho; - Real xn_sum = 0.0_rt; - for (int n = 0; n < NumSpec; ++n) { - xn_sum += eos_state.xn[n] * eos_xderivs.dedX[n]; - } - - dwdU(iwe, 0) = denom * (xn_sum - eos_state.e); - - for (int m = 0; m < NumSpec; m++) { - dwdU(iwe, m+1) = -denom * eos_xderivs.dedX[m]; - } - - dwdU(iwe, iwe) = denom; - - // construct the Jacobian -- note: the Jacobian is 1-based + single_zone_react_source(U_full, R_full, burn_state_pass); - for (int n = 0; n <= NumSpec+1; ++n) { - for (int m = 0; m <= NumSpec+1; ++m) { - pd(n+1, m+1) = 0.0_rt; - for (int l = 0; l <= NumSpec+1; ++l) { - pd(n+1, m+1) = dRdw(n,l) * dwdU(l,m); - } - } - } + single_zone_jac(U_full, burn_state_pass, vode_state.H, pd); } From 893c1cb6b13ce77b6a3413afbeecb4d770df5d2d Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 20 Sep 2023 21:24:37 -0400 Subject: [PATCH 10/13] fix --- Source/sdc/vode_rhs_true_sdc.H | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index ee1818d29e..9e61f3a3df 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -14,6 +14,8 @@ #include #include +#include + // The f_rhs routine provides the right-hand-side for the DVODE solver. // This is a generic interface that calls the specific RHS routine in the // network you're actually using. @@ -33,7 +35,7 @@ void rhs(const Real time, burn_t& burn_state, DvodeT& vode_state, RArray1D& dUdt // update the density -- we'll need this for the EOS call in the // react update - U_full[URHO] = burn_state.rho_orig + time * state.ydot_a[URHO]; + U_full[URHO] = burn_state.rho_orig + time * burn_state.ydot_a[SRHO]; // we are not solving the momentum equations // and never need total energy, so we don't worry about filling those @@ -61,9 +63,9 @@ void rhs(const Real time, burn_t& burn_state, DvodeT& vode_state, RArray1D& dUdt // now construct the RHS -- note this is 1-based for (int n = 0; n < NumSpec; ++n) { - dUdt(n-1) = R_react[UFS+n] + burn_state.ydot_a[UFS+n]; + dUdt(n+1) = R_full[UFS+n] + burn_state.ydot_a[SFS+n]; } - dUdt(NumSpec+1) = R_react[UEINT] + burn_state.ydot_a[UEINT]; + dUdt(NumSpec+1) = R_full[UEINT] + burn_state.ydot_a[SEINT]; } @@ -82,7 +84,7 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p // we are not solving the momentum equations // create a full state -- we need this for some interfaces - U_full[URHO] = burn_state.rho_orig + time * state.ydot_a[URHO]; + U_full[URHO] = burn_state.rho_orig + time * burn_state.ydot_a[URHO]; for (int n = 0; n < NumSpec; n++) { U_full[UFS+n] = vode_state.y(1+n); @@ -129,7 +131,6 @@ sdc_vode_solve(const Real dt_m, // The tolerance we are solving to may depend on the // iteration auto relax_fac = std::pow(sdc_solver_relax_factor, sdc_order - sdc_iteration - 1); - auto tol_dens = sdc_solver_tol_dens * relax_fac; auto tol_spec = sdc_solver_tol_spec * relax_fac; auto tol_ener = sdc_solver_tol_ener * relax_fac; @@ -151,15 +152,30 @@ sdc_vode_solve(const Real dt_m, burn_t burn_state; burn_state.rho_orig = U_old[URHO]; + burn_state.rhoe_orig = U_old[UEINT]; // If we are solving the system as an ODE, then we are // solving // dU/dt = R(U) + C // so we simply pass in C - for (int n = 0; n < NUM_STATE; ++n) { - burn_state.ydot_a[n] = C[n]; + // Note: we need to use the indexing order that is defined in the burn_t + // so we are compatible with the integrator logic + + burn_state.ydot_a[SRHO] = C[URHO]; + burn_state.ydot_a[SMX] = C[UMX]; + burn_state.ydot_a[SMY] = C[UMY]; + burn_state.ydot_a[SMZ] = C[UMZ]; + burn_state.ydot_a[SEDEN] = C[UEDEN]; + burn_state.ydot_a[SEINT] = C[UEINT]; + for (int n = 0; n < NumSpec; n++) { + burn_state.ydot_a[SFS+n] = C[UFS+n]; } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.ydot_a[SFX+n] = C[UFX+n]; + } +#endif burn_state.success = true; @@ -188,6 +204,8 @@ sdc_vode_solve(const Real dt_m, dvode_state.tout = dt_m; dvode_state.HMXI = 1.0_rt / ode_max_dt; + dvode_state.jacobian_type = integrator_rp::jacobian; + // relative tolerances dvode_state.rtol_spec = tol_spec; dvode_state.rtol_enuc = tol_ener; From f9be0520e03695209d7a7c3c47869ee66aa96e67 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 20 Sep 2023 21:25:32 -0400 Subject: [PATCH 11/13] fix --- Source/sdc/Castro_sdc_util.H | 2 -- 1 file changed, 2 deletions(-) diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index f79d190573..164daff206 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -69,7 +69,6 @@ sdc_solve(const Real dt_m, if (ierr != NEWTON_SUCCESS) { Abort("Newton subcycling failed in sdc_solve"); } -#if 0 } else if (sdc_solver == VODE_SOLVE) { // Use VODE to do the solution sdc_vode_solve(dt_m, U_old, U_new, C, sdc_iteration); @@ -89,7 +88,6 @@ sdc_solve(const Real dt_m, if (ierr != NEWTON_SUCCESS) { Abort("Newton failure in sdc_solve"); } -#endif } } From 8cfb5be460fb7eceb10e2e91c3501aa4cc4ff396 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Thu, 21 Sep 2023 09:07:46 -0400 Subject: [PATCH 12/13] update benchmark --- .../react_converge_128_true_sdc.out | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out b/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out index b867f0c375..03efe9ecaa 100644 --- a/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out +++ b/Exec/reacting_tests/reacting_convergence/ci-benchmarks/react_converge_128_true_sdc.out @@ -2,58 +2,58 @@ time = 0.000901 variables minimum value maximum value density 499999.99996 1003267.4037 - xmom -1.64054094e+12 1.64054094e+12 - ymom -1.64054094e+12 1.64054094e+12 + xmom -1.6405409458e+12 1.6405409458e+12 + ymom -1.6405409458e+12 1.6405409458e+12 zmom 0 0 - rho_E 2.263117857e+22 7.0166374851e+22 - rho_e 2.263117857e+22 7.0166358897e+22 - Temp 300032898.05 454530914.06 - rho_He4 499997.75752 1002591.9748 - rho_C12 2.2423384483 675.42858349 - rho_O16 5.0001038346e-05 0.00023067858795 + rho_E 2.263117857e+22 7.0166374758e+22 + rho_e 2.263117857e+22 7.0166358804e+22 + Temp 300032898.05 454530912.93 + rho_He4 499997.75752 1002591.9749 + rho_C12 2.2423384483 675.42842495 + rho_O16 5.0001038346e-05 0.00023067823575 rho_Fe56 4.9999999996e-05 0.00010032674037 - rho_enuc 1.4565745423e+21 4.5249954871e+23 - density_sdc_source -544925.79275 78513.247889 - xmom_sdc_source -1.8266340469e+15 1.8266340469e+15 - ymom_sdc_source -1.8266340469e+15 1.8266340469e+15 + rho_enuc 1.4565745423e+21 4.5249964745e+23 + density_sdc_source -544925.80758 78513.247869 + xmom_sdc_source -1.8266340546e+15 1.8266340546e+15 + ymom_sdc_source -1.8266340546e+15 1.8266340546e+15 zmom_sdc_source 0 0 - rho_E_sdc_source -6.1992456785e+22 7.5781242406e+21 - rho_e_sdc_source -6.1984065959e+22 6.3082607681e+21 + rho_E_sdc_source -6.1992501236e+22 7.5781242663e+21 + rho_e_sdc_source -6.1984110404e+22 6.3082607836e+21 Temp_sdc_source 0 0 - rho_He4_sdc_source -543231.29383 78510.973919 - rho_C12_sdc_source -1694.4890575 10.073635142 - rho_O16_sdc_source -0.0098093776861 7.1506350196e-06 - rho_Fe56_sdc_source -5.4492579275e-05 7.8513247889e-06 - pressure 1.4155320805e+22 4.2608130858e+22 - kineng 6.7398897838e-15 1.6647276226e+18 - soundspeed 212069503.63 257404342.21 + rho_He4_sdc_source -543231.23569 78510.973875 + rho_C12_sdc_source -1694.5620221 10.07378628 + rho_O16_sdc_source -0.0098098270645 7.1506434889e-06 + rho_Fe56_sdc_source -5.4492580758e-05 7.8513247869e-06 + pressure 1.4155320805e+22 4.2608130805e+22 + kineng 2.4519179612e-13 1.6647276308e+18 + soundspeed 212069503.63 257404342.05 Gamma_1 1.5601126452 1.5885713572 - MachNumber 7.7424457878e-19 0.0086982771596 - entropy 348439780.9 349209883.57 - magvort 0 0.00018541051835 - divu -0.1163387912 0.55816306007 - eint_E 4.5262357143e+16 6.9937847678e+16 - eint_e 4.5262357143e+16 6.9937843728e+16 + MachNumber 4.669869489e-18 0.008698277168 + entropy 348439780.9 349209883.38 + magvort 0 0.00018541051834 + divu -0.11633879117 0.55816307483 + eint_E 4.5262357143e+16 6.9937847586e+16 + eint_e 4.5262357143e+16 6.9937843636e+16 logden 5.6989700043 6.0014167022 StateErr_0 499999.99996 1003267.4037 - StateErr_1 300032898.05 454530914.06 - StateErr_2 0.9993267708 0.99999551512 - X(He4) 0.9993267708 0.99999551512 - X(C12) 4.484676897e-06 0.00067322887298 - X(O16) 1.000020767e-10 2.2992732257e-10 + StateErr_1 300032898.05 454530912.93 + StateErr_2 0.99932677096 0.99999551512 + X(He4) 0.99932677096 0.99999551512 + X(C12) 4.484676897e-06 0.00067322871496 + X(O16) 1.000020767e-10 2.2992697152e-10 X(Fe56) 1e-10 1e-10 - abar 4.0000119598 4.0017960842 + abar 4.0000119598 4.0017960838 Ye 0.5 0.5 - x_velocity -2064636.8122 2064636.8122 - y_velocity -2064636.8122 2064636.8122 + x_velocity -2064636.8156 2064636.8156 + y_velocity -2064636.8156 2064636.8156 z_velocity 0 0 - t_sound_t_enuc 0.00023710316663 0.0195732693 - enuc 2.9131490847e+15 4.5102586513e+17 - magvel 1.6419366351e-10 2067446.1363 - radvel -0.00067838574982 2067446.1363 - circvel 0 11.820144682 - magmom 8.209683175e-05 1.6422547233e+12 - angular_momentum_x 0 0 - angular_momentum_y 0 0 - angular_momentum_z -1.2410862732e+14 1.2410862751e+14 + t_sound_t_enuc 0.00023710316663 0.019573273608 + enuc 2.9131490847e+15 4.5102596355e+17 + magvel 9.9033690456e-10 2067446.1392 + radvel -0.00067838913019 2067446.1391 + circvel 0 11.820185992 + magmom 0.00049516845224 1.6422547294e+12 + angular_momentum_x -0 -0 + angular_momentum_y -0 -0 + angular_momentum_z -1.2410867567e+14 1.2410867593e+14 From 3b7d2f039dc75a413c9a114b603c1183604765e2 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Thu, 21 Sep 2023 20:08:32 -0400 Subject: [PATCH 13/13] fix loop --- Source/sdc/sdc_newton_solve.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index 9f2c559b61..a8fbd2d1ec 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -273,7 +273,7 @@ sdc_newton_solve(const Real dt_m, // weights are 1/eps_tot auto err_sum = 0.0_rt; - for (int n = 1; n < NumSpec+1; ++n) { + for (int n = 1; n <= NumSpec+1; ++n) { err_sum += dU_react(n) * dU_react(n) / (eps_tot(n) * eps_tot(n)); } err = std::sqrt(err_sum / static_cast(NumSpec+1));