From a1869ad755c212dec92f97ac044c1a42f1a0ccb6 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 4 Sep 2024 19:32:41 -0600 Subject: [PATCH 01/62] Initial crack at CG --- src/CMakeLists.txt | 1 + src/solvers/cg_solver.hpp | 271 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 src/solvers/cg_solver.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f66585a8e8c..dcc582d83ab1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -227,6 +227,7 @@ add_library(parthenon amr_criteria/refinement_package.hpp solvers/bicgstab_solver.hpp + solvers/cg_solver.hpp solvers/mg_solver.hpp solvers/solver_utils.hpp diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp new file mode 100644 index 000000000000..dedf9605d698 --- /dev/null +++ b/src/solvers/cg_solver.hpp @@ -0,0 +1,271 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_CG_SOLVER_HPP_ +#define SOLVERS_CG_SOLVER_HPP_ + +#include +#include +#include +#include + +#include "interface/mesh_data.hpp" +#include "interface/meshblock_data.hpp" +#include "interface/state_descriptor.hpp" +#include "kokkos_abstraction.hpp" +#include "solvers/mg_solver.hpp" +#include "solvers/solver_utils.hpp" +#include "tasks/tasks.hpp" +#include "utils/type_list.hpp" + +namespace parthenon { + +namespace solvers { + +struct CGParams { + MGParams mg_params; + int max_iters = 1000; + std::shared_ptr residual_tolerance = std::make_shared(1.e-12); + bool precondition = true; + bool print_per_step = false; + bool relative_residual = false; + CGParams() = default; + CGParams(ParameterInput *pin, const std::string &input_block) { + max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); + *residual_tolerance = + pin->GetOrAddReal(input_block, "residual_tolerance", *residual_tolerance); + precondition = pin->GetOrAddBoolean(input_block, "precondition", precondition); + print_per_step = pin->GetOrAddBoolean(input_block, "print_per_step", print_per_step); + mg_params = MGParams(pin, input_block); + relative_residual = + pin->GetOrAddBoolean(input_block, "relative_residual", relative_residual); + } +}; + +// The equations class must include a template method +// +// template +// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) +// +// that takes a field associated with x_t and applies +// the matrix A to it and stores the result in y_t. +template +class CGSolver { + public: + PARTHENON_INTERNALSOLVERVARIABLE(u, x); + PARTHENON_INTERNALSOLVERVARIABLE(u, r); + PARTHENON_INTERNALSOLVERVARIABLE(u, v); + PARTHENON_INTERNALSOLVERVARIABLE(u, p); + + using internal_types_tl = TypeList; + using preconditioner_t = MGSolver; + using all_internal_types_tl = + concatenate_type_lists_t; + + std::vector GetInternalVariableNames() const { + std::vector names; + if (params_.precondition) { + all_internal_types_tl::IterateTypes( + [&names](auto t) { names.push_back(decltype(t)::name()); }); + } else { + internal_types_tl::IterateTypes( + [&names](auto t) { names.push_back(decltype(t)::name()); }); + } + return names; + } + + CGSolver(StateDescriptor *pkg, CGParams params_in, + equations eq_in = equations(), std::vector shape = {}, + const std::string &container = "base") + : preconditioner(pkg, params_in.mg_params, eq_in, shape, container), + params_(params_in), iter_counter(0), eqs_(eq_in), container_(container) { + using namespace refinement_ops; + auto m_no_ghost = + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, shape); + pkg->AddField(x::name(), m_no_ghost); + pkg->AddField(r::name(), m_no_ghost); + pkg->AddField(v::name(), m_no_ghost); + pkg->AddField(p::name(), m_no_ghost); + } + + template + TaskID AddSetupTasks(TL_t &tl, TaskID dependence, int partition, Mesh *pmesh) { + return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); + } + + TaskID AddTasks(TaskList &tl, TaskID dependence, Mesh *pmesh, const int partition) { + using namespace utils; + TaskID none; + auto &md = pmesh->mesh_data.GetOrAdd(container_, partition); + std::string label = container_ + "cg_comm_" + std::to_string(partition); + auto &md_comm = + pmesh->mesh_data.AddShallow(label, md, std::vector{u::name()}); + iter_counter = 0; + bool multilevel = pmesh->multilevel; + + // Initialization: u <- 0, r <- rhs, p <- 0, rz <- 1 + auto zero_x = tl.AddTask(dependence, TF(SetToZero), md); + auto zero_u = tl.AddTask(dependence, TF(SetToZero), md); + auto zero_p = tl.AddTask(dependence, TF(SetToZero

), md); + auto copy_r = tl.AddTask(dependence, TF(CopyData), md); + if (params_.relative_residual) + get_rhs2 = DotProduct(dependence, tl, &rhs2, md); + auto initialize = tl.AddTask( + TaskQualifier::once_per_region | TaskQualifier::local_sync, + zero_x | zero_u | zero_p | copy_r | get_rhs2, + "zero factors", + [](CGSolver *solver) { + solver->iter_counter = -1; + solver->rz.val = 1.0; + return TaskStatus::complete; + }, + this); + auto tol = tl.AddTask( + TaskQualifier::once_per_region, initialize, "print to screen", + [&](CGSolver *solver, std::shared_ptr res_tol, + bool relative_residual) { + if (Globals::my_rank == 0 && params_.print_per_step) { + Real tol = + relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) + : *res_tol; + printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", + tol); + } + return TaskStatus::complete; + }, + this, params_.residual_tolerance, params_.relative_residual); + + // BEGIN ITERATIVE TASKS + auto [itl, solver_id] = tl.AddSublist(tol, {1, params_.max_iters}); + + auto sync = itl.AddTask(TaskQualifier::local_sync, none, + []() { return TaskStatus::complete; }); + auto reset = itl.AddTask( + TaskQualifier::once_per_region, sync, "update values", + [](CGSolver *solver) { + solver->rz_old = solver->rz.val; + solver->iter_counter++; + return TaskStatus::complete; + }, + this); + + // 1. u <- M r + auto precon = reset; + if (params_.precondition) { + auto set_rhs = itl.AddTask(precon, TF(CopyData), md); + auto zero_u = itl.AddTask(precon, TF(SetToZero), md); + precon = + preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + } else { + precon = itl.AddTask(none, TF(CopyData), md); + } + + // 2. beta <- r dot u / r dot u {old} + auto get_ru = DotProduct(precon, itl, &ru, md); + + // 3. p <- u + beta p + auto correct_p = itl.AddTask( + get_ru, "p <- u + beta p", + [](CGSolver *solver, std::shared_ptr> &md) { + Real beta = solver->ru.val / solver->ru_old; + return AddFieldsAndStore(md, 1.0, beta); + }, + this, md); + + // 4. v <- A p + auto comm = + AddBoundaryExchangeTasks(correct_p, itl, md_comm, multilevel); + auto get_v = eqs_.template Ax(itl, comm, md); + + // 5. alpha <- rz / p A p (calculate denominator) + auto get_pAp = DotProduct(get_v, itl, &pAp, md); + + // 6. x <- x + alpha p + auto correct_x = itl.AddTask( + get_pAp, "x <- x + alpha p", + [](CGSolver *solver, std::shared_ptr> &md) { + Real alpha = solver->ru.val / solver->pAp.val; + return AddFieldsAndStore(md, 1.0, alpha); + }, + this, md); + + // 6. r <- r - alpha A p + auto correct_r = itl.AddTask( + get_pAp, "r <- r - alpha A p", + [](CGSolver *solver, std::shared_ptr> &md) { + Real alpha = solver->ru.val / solver->pAp.val; + return AddFieldsAndStore(md, 1.0, -alpha); + }, + this, md); + + // 7. Check and print out residual + auto get_res = DotProduct(correct_r, itl, &residual, md); + + auto print = itl.AddTask( + TaskQualifier::once_per_region, get_res, + [&](CGSolver *solver, Mesh *pmesh) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + if (Globals::my_rank == 0 && solver->params_.print_per_step) + printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res); + return TaskStatus::complete; + }, + this, pmesh); + + auto check = itl.AddTask( + TaskQualifier::completion, get_res | correct_x, "completion", + [partition](CGSolver *solver, Mesh *pmesh, int max_iter, + std::shared_ptr res_tol, bool relative_residual) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) + : *res_tol; + if (rms_res < tol || solver->iter_counter >= max_iter) { + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + return TaskStatus::complete; + } + return TaskStatus::iterate; + }, + this, pmesh, params_.max_iters, params_.residual_tolerance, + params_.relative_residual); + + return tl.AddTask(solver_id, TF(CopyData), md); + } + + Real GetSquaredResidualSum() const { return residual.val; } + int GetCurrentIterations() const { return iter_counter; } + + Real GetFinalResidual() const { return final_residual; } + int GetFinalIterations() const { return final_iteration; } + + CGParams &GetParams() { return params_; } + + protected: + preconditioner_t preconditioner; + CGParams params_; + int iter_counter; + AllReduce ru, pAp; + Real ru_old; + equations eqs_; + Real final_residual; + int final_iteration; + std::string container_; +}; + +} // namespace solvers +} // namespace parthenon + +#endif // SOLVERS_CG_SOLVER_HPP_ From fe6a9033fe0be58522298f93427cecdb7aedaec8 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 12 Sep 2024 09:57:55 -0600 Subject: [PATCH 02/62] Messing around with CG, not working well --- src/solvers/cg_solver.hpp | 48 +++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index dedf9605d698..1f591bf1b4d1 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -113,48 +113,51 @@ class CGSolver { iter_counter = 0; bool multilevel = pmesh->multilevel; - // Initialization: u <- 0, r <- rhs, p <- 0, rz <- 1 - auto zero_x = tl.AddTask(dependence, TF(SetToZero), md); + // Initialization: u <- 0, r <- rhs, p <- 0, ru <- 1 auto zero_u = tl.AddTask(dependence, TF(SetToZero), md); + auto zero_v = tl.AddTask(dependence, TF(SetToZero), md); + auto zero_x = tl.AddTask(dependence, TF(SetToZero), md); auto zero_p = tl.AddTask(dependence, TF(SetToZero

), md); auto copy_r = tl.AddTask(dependence, TF(CopyData), md); + auto get_rhs2 = none; if (params_.relative_residual) get_rhs2 = DotProduct(dependence, tl, &rhs2, md); auto initialize = tl.AddTask( TaskQualifier::once_per_region | TaskQualifier::local_sync, - zero_x | zero_u | zero_p | copy_r | get_rhs2, + zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, "zero factors", [](CGSolver *solver) { solver->iter_counter = -1; - solver->rz.val = 1.0; + solver->ru.val = std::numeric_limits::max(); return TaskStatus::complete; }, this); - auto tol = tl.AddTask( - TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolver *solver, std::shared_ptr res_tol, - bool relative_residual) { - if (Globals::my_rank == 0 && params_.print_per_step) { + + if (params_.print_per_step && Globals::my_rank == 0) { + initialize = tl.AddTask( + TaskQualifier::once_per_region, initialize, "print to screen", + [&](CGSolver *solver, std::shared_ptr res_tol, + bool relative_residual) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) : *res_tol; printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", tol); - } - return TaskStatus::complete; - }, - this, params_.residual_tolerance, params_.relative_residual); + return TaskStatus::complete; + }, + this, params_.residual_tolerance, params_.relative_residual); + } // BEGIN ITERATIVE TASKS - auto [itl, solver_id] = tl.AddSublist(tol, {1, params_.max_iters}); + auto [itl, solver_id] = tl.AddSublist(initialize, {1, params_.max_iters}); auto sync = itl.AddTask(TaskQualifier::local_sync, none, []() { return TaskStatus::complete; }); auto reset = itl.AddTask( TaskQualifier::once_per_region, sync, "update values", [](CGSolver *solver) { - solver->rz_old = solver->rz.val; + solver->ru_old = solver->ru.val; solver->iter_counter++; return TaskStatus::complete; }, @@ -168,7 +171,7 @@ class CGSolver { precon = preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); } else { - precon = itl.AddTask(none, TF(CopyData), md); + precon = itl.AddTask(precon, TF(CopyData), md); } // 2. beta <- r dot u / r dot u {old} @@ -184,11 +187,12 @@ class CGSolver { this, md); // 4. v <- A p + auto copy_u = itl.AddTask(correct_p, TF(CopyData), md); auto comm = - AddBoundaryExchangeTasks(correct_p, itl, md_comm, multilevel); - auto get_v = eqs_.template Ax(itl, comm, md); + AddBoundaryExchangeTasks(copy_u, itl, md_comm, multilevel); + auto get_v = eqs_.template Ax(itl, comm, md); - // 5. alpha <- rz / p A p (calculate denominator) + // 5. alpha <- r dot u / p dot v (calculate denominator) auto get_pAp = DotProduct(get_v, itl, &pAp, md); // 6. x <- x + alpha p @@ -217,14 +221,14 @@ class CGSolver { [&](CGSolver *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && solver->params_.print_per_step) - printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res); + printf("%i %e\n", solver->iter_counter, rms_res); return TaskStatus::complete; }, this, pmesh); auto check = itl.AddTask( TaskQualifier::completion, get_res | correct_x, "completion", - [partition](CGSolver *solver, Mesh *pmesh, int max_iter, + [](CGSolver *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, bool relative_residual) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); solver->final_residual = rms_res; @@ -257,7 +261,7 @@ class CGSolver { preconditioner_t preconditioner; CGParams params_; int iter_counter; - AllReduce ru, pAp; + AllReduce ru, pAp, residual, rhs2; Real ru_old; equations eqs_; Real final_residual; From 507cbd371d7b9b2b07b5b545e7bbd6287f0efca8 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 12 Sep 2024 09:58:05 -0600 Subject: [PATCH 03/62] Add CG option to poisson --- example/poisson_gmg/poisson_driver.cpp | 12 ++++++++++++ example/poisson_gmg/poisson_package.cpp | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index b995613da077..179a615a1176 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -28,6 +28,7 @@ #include "poisson_package.hpp" #include "prolong_restrict/prolong_restrict.hpp" #include "solvers/bicgstab_solver.hpp" +#include "solvers/cg_solver.hpp" #include "solvers/mg_solver.hpp" using namespace parthenon::driver::prelude; @@ -50,6 +51,11 @@ parthenon::DriverStatus PoissonDriver::Execute() { pkg->MutableParam>( "MGBiCGSTABsolver"); final_rms_residual = bicgstab_solver->GetFinalResidual(); + } else if (solver == "CG") { + auto *cg_solver = + pkg->MutableParam>( + "MGCGsolver"); + final_rms_residual = cg_solver->GetFinalResidual(); } else if (solver == "MG") { auto *mg_solver = pkg->MutableParam>( @@ -76,6 +82,9 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto *bicgstab_solver = pkg->MutableParam>( "MGBiCGSTABsolver"); + auto *cg_solver = + pkg->MutableParam>( + "MGCGsolver"); auto partitions = pmesh->GetDefaultBlockPartitions(); const int num_partitions = partitions.size(); @@ -102,6 +111,9 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { if (solver == "BiCGSTAB") { auto setup = bicgstab_solver->AddSetupTasks(tl, zero_u, i, pmesh); solve = bicgstab_solver->AddTasks(tl, setup, pmesh, i); + } else if (solver == "CG") { + auto setup = cg_solver->AddSetupTasks(tl, zero_u, i, pmesh); + solve = cg_solver->AddTasks(tl, setup, pmesh, i); } else if (solver == "MG") { auto setup = mg_solver->AddSetupTasks(tl, zero_u, i, pmesh); solve = mg_solver->AddTasks(tl, setup, pmesh, i); diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 1826bda428af..62bbf222aafe 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,12 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg.get(), bicgstab_params, eq); pkg->AddParam<>("MGBiCGSTABsolver", bicg_solver, parthenon::Params::Mutability::Mutable); + + parthenon::solvers::CGParams cg_params(pin, "poisson/solver_params"); + parthenon::solvers::CGSolver cg_solver( + pkg.get(), cg_params, eq); + pkg->AddParam<>("MGCGsolver", cg_solver, + parthenon::Params::Mutability::Mutable); using namespace parthenon::refinement_ops; auto mD = Metadata( From 38543b079029c071cd1d8f088ad7a611251601bf Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 18 Sep 2024 09:11:30 -0600 Subject: [PATCH 04/62] explicitly set beta = 0 on first iter --- src/solvers/cg_solver.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index 1f591bf1b4d1..ee20b422649f 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -181,7 +181,7 @@ class CGSolver { auto correct_p = itl.AddTask( get_ru, "p <- u + beta p", [](CGSolver *solver, std::shared_ptr> &md) { - Real beta = solver->ru.val / solver->ru_old; + Real beta = solver->iter_counter > 0 ? solver->ru.val / solver->ru_old : 0.0; return AddFieldsAndStore(md, 1.0, beta); }, this, md); From 162a0a6e21503ec53d5d6c17befbb5ac25db617d Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 23 Sep 2024 18:50:49 -0600 Subject: [PATCH 05/62] Messing around with different prolongation operators --- src/prolong_restrict/pr_ops.hpp | 93 +++++++++++++++++++++++++++++++++ src/solvers/mg_solver.hpp | 20 ++++++- 2 files changed, 111 insertions(+), 2 deletions(-) diff --git a/src/prolong_restrict/pr_ops.hpp b/src/prolong_restrict/pr_ops.hpp index f074f6a5d02b..dfcbad5fea44 100644 --- a/src/prolong_restrict/pr_ops.hpp +++ b/src/prolong_restrict/pr_ops.hpp @@ -288,6 +288,99 @@ using ProlongateSharedMinMod = ProlongateSharedGeneral; using ProlongateSharedLinear = ProlongateSharedGeneral; using ProlongatePiecewiseConstant = ProlongateSharedGeneral; +enum class MGProlongationType {Constant, Linear, Quadratic, Kwak}; + +template +struct ProlongateSharedMG { + static constexpr bool OperationRequired(TopologicalElement fel, + TopologicalElement cel) { + if (fel != TopologicalElement::CC) return false; + return fel == cel; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real QuadraticFactor(int d) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1 || d == -1) return 30.0 / 32.0; + if (d == 3 || d == -3) return 5.0 / 32.0; + return -3.0 / 32.0; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real LinearFactor(int d, bool up_bound, bool lo_bound) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1) return (2.0 + !up_bound) / 4.0; + if (d == -1) return (2.0 + !lo_bound) / 4.0; + if (d == 3) return !up_bound / 4.0; + if (d == -3) return !lo_bound / 4.0; + return 0.0; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real ConstantFactor(int d) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1) return 1.0; + return 0.0; + } + + template + KOKKOS_FORCEINLINE_FUNCTION static void + Do(const int l, const int m, const int n, const int k, const int j, const int i, + const IndexRange &ckb, const IndexRange &cjb, const IndexRange &cib, + const IndexRange &kb, const IndexRange &jb, const IndexRange &ib, + const Coordinates_t &coords, const Coordinates_t &coarse_coords, + const ParArrayND *pcoarse, + const ParArrayND *pfine) { + using namespace util; + auto &coarse = *pcoarse; + auto &fine = *pfine; + + constexpr int element_idx = static_cast(el) % 3; + + const int fi = (DIM > 0) ? (i - cib.s) * 2 + ib.s : ib.s; + const int fj = (DIM > 1) ? (j - cjb.s) * 2 + jb.s : jb.s; + const int fk = (DIM > 2) ? (k - ckb.s) * 2 + kb.s : kb.s; + + for (int fok = 0; fok < 1 + (DIM > 2); ++fok) { + for (int foj = 0; foj < 1 + (DIM > 1); ++foj) { + for (int foi = 0; foi < 1 + (DIM > 0); ++foi) { + auto &f = fine(element_idx, l, m, n, fk + fok, fj + foj, fi + foi); + f = 0.0; + const bool lo_bound_x = (fi == ib.s); + const bool up_bound_x = ((fi + foi) == ib.e); + const bool lo_bound_y = (fj == jb.s); + const bool up_bound_y = ((fj + foj) == jb.e); + const bool lo_bound_z = (fk == kb.s); + const bool up_bound_z = ((fk + fok) == kb.e); + for (int ok = -(DIM > 2); ok < 1 + (DIM > 2); ++ok) { + for (int oj = -(DIM > 1); oj < 1 + (DIM > 1); ++oj) { + for (int oi = -(DIM > 0); oi < 1 + (DIM > 0); ++oi) { + const int dx = 4 * oi - foi + 1; + const int dy = (DIM > 1) ? 4 * oj - foj + 1 : 0; + const int dz = (DIM > 2) ? 4 * ok - fok + 1 : 0; + if constexpr (MGProlongationType::Linear == type) { + f += LinearFactor(dx, lo_bound_x, up_bound_x) + * LinearFactor(dy, lo_bound_y, up_bound_y) + * LinearFactor(dz, lo_bound_z, up_bound_z) + * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + } else if constexpr (MGProlongationType::Kwak == type) { + const Real fac = ((dx <= 1) + (dy <= 1 && DIM > 1) + (dz <=1 && DIM > 2)) / (2.0 * DIM); + f += fac * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + } else if constexpr(MGProlongationType::Quadratic == type) { + f += QuadraticFactor(dx) * QuadraticFactor(dy) * QuadraticFactor(dz) * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + } else { + f += ConstantFactor(dx) * ConstantFactor(dy) * ConstantFactor(dz) * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + } + } + } + } + } + } + } + } +}; + struct ProlongateInternalAverage { static constexpr bool OperationRequired(TopologicalElement fel, TopologicalElement cel) { diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index ee8cfff177ab..665c91a6e663 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -40,6 +40,7 @@ struct MGParams { std::string smoother = "SRJ2"; bool two_by_two_diagonal = false; int max_coarsenings = std::numeric_limits::max(); + std::string prolongation = "Linear"; MGParams() = default; MGParams(ParameterInput *pin, const std::string &input_block) { @@ -48,6 +49,7 @@ struct MGParams { pin->GetOrAddReal(input_block, "residual_tolerance", residual_tolerance); do_FAS = pin->GetOrAddBoolean(input_block, "do_FAS", do_FAS); smoother = pin->GetOrAddString(input_block, "smoother", smoother); + prolongation = pin->GetOrAddString(input_block, "prolongation", prolongation); two_by_two_diagonal = pin->GetOrAddBoolean(input_block, "two_by_two_diagonal", two_by_two_diagonal); max_coarsenings = @@ -97,7 +99,21 @@ class MGSolver { Metadata({Metadata::Cell, Metadata::Independent, Metadata::GMGRestrict, Metadata::GMGProlongate, Metadata::OneCopy}, shape); - mres_err.RegisterRefinementOps(); + + if (params_.prolongation == "Linear") { + mres_err.RegisterRefinementOps, RestrictAverage>(); + } else if (params_.prolongation == "Kwak") { + mres_err.RegisterRefinementOps, RestrictAverage>(); + } else if (params_.prolongation == "Quadratic") { + mres_err.RegisterRefinementOps, RestrictAverage>(); + } else if (params_.prolongation == "Consatnts") { + mres_err.RegisterRefinementOps, RestrictAverage>(); + } else if (params_.prolongation == "OldLinear") { + mres_err.RegisterRefinementOps(); + } else { + printf("Requested prolongation type: %s\n", params_.prolongation.c_str()); + PARTHENON_FAIL("Unknown multi-grid prolongation type."); + } pkg->AddField(res_err::name(), mres_err); auto mtemp = @@ -391,7 +407,7 @@ class MGSolver { pre_stages = 3; post_stages = 3; } else { - PARTHENON_FAIL("Unknown solver type."); + PARTHENON_FAIL("Unknown smoother type."); } // auto decorate_task_name = [partition, level](const std::string &in, auto b) { From 216f37003cfcb70c78979340d1d3c0965dfafd51 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 23 Sep 2024 19:09:35 -0600 Subject: [PATCH 06/62] Fix bugs --- src/prolong_restrict/pr_ops.hpp | 2 +- src/solvers/mg_solver.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/prolong_restrict/pr_ops.hpp b/src/prolong_restrict/pr_ops.hpp index dfcbad5fea44..1f4c6960a230 100644 --- a/src/prolong_restrict/pr_ops.hpp +++ b/src/prolong_restrict/pr_ops.hpp @@ -319,7 +319,7 @@ struct ProlongateSharedMG { KOKKOS_FORCEINLINE_FUNCTION static Real ConstantFactor(int d) { if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1) return 1.0; + if (d == 1 || d == -1) return 1.0; return 0.0; } diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 665c91a6e663..5867677073b7 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -106,7 +106,7 @@ class MGSolver { mres_err.RegisterRefinementOps, RestrictAverage>(); } else if (params_.prolongation == "Quadratic") { mres_err.RegisterRefinementOps, RestrictAverage>(); - } else if (params_.prolongation == "Consatnts") { + } else if (params_.prolongation == "Constant") { mres_err.RegisterRefinementOps, RestrictAverage>(); } else if (params_.prolongation == "OldLinear") { mres_err.RegisterRefinementOps(); From 3c81e681039e77336d1816f5dc66cb2afb2572bf Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 23 Sep 2024 19:26:02 -0600 Subject: [PATCH 07/62] format and lint --- example/poisson_gmg/poisson_package.cpp | 9 ++--- src/prolong_restrict/pr_ops.hpp | 50 +++++++++++++------------ src/solvers/cg_solver.hpp | 35 +++++++++-------- src/solvers/mg_solver.hpp | 14 ++++--- 4 files changed, 57 insertions(+), 51 deletions(-) diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index b95e0cab81ba..4c7dff37ce01 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -109,12 +109,11 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg.get(), bicgstab_params, eq); pkg->AddParam<>("MGBiCGSTABsolver", bicg_solver, parthenon::Params::Mutability::Mutable); - + parthenon::solvers::CGParams cg_params(pin, "poisson/solver_params"); - parthenon::solvers::CGSolver cg_solver( - pkg.get(), cg_params, eq); - pkg->AddParam<>("MGCGsolver", cg_solver, - parthenon::Params::Mutability::Mutable); + parthenon::solvers::CGSolver cg_solver(pkg.get(), cg_params, + eq); + pkg->AddParam<>("MGCGsolver", cg_solver, parthenon::Params::Mutability::Mutable); using namespace parthenon::refinement_ops; auto mD = Metadata( diff --git a/src/prolong_restrict/pr_ops.hpp b/src/prolong_restrict/pr_ops.hpp index 1f4c6960a230..5e6bf223b31b 100644 --- a/src/prolong_restrict/pr_ops.hpp +++ b/src/prolong_restrict/pr_ops.hpp @@ -288,7 +288,7 @@ using ProlongateSharedMinMod = ProlongateSharedGeneral; using ProlongateSharedLinear = ProlongateSharedGeneral; using ProlongatePiecewiseConstant = ProlongateSharedGeneral; -enum class MGProlongationType {Constant, Linear, Quadratic, Kwak}; +enum class MGProlongationType { Constant, Linear, Quadratic, Kwak }; template struct ProlongateSharedMG { @@ -299,7 +299,7 @@ struct ProlongateSharedMG { } KOKKOS_FORCEINLINE_FUNCTION - static Real QuadraticFactor(int d) { + static Real QuadraticFactor(int d) { if (d == 0) return 1.0; // Indicates this dimension is not included if (d == 1 || d == -1) return 30.0 / 32.0; if (d == 3 || d == -3) return 5.0 / 32.0; @@ -307,7 +307,7 @@ struct ProlongateSharedMG { } KOKKOS_FORCEINLINE_FUNCTION - static Real LinearFactor(int d, bool up_bound, bool lo_bound) { + static Real LinearFactor(int d, bool up_bound, bool lo_bound) { if (d == 0) return 1.0; // Indicates this dimension is not included if (d == 1) return (2.0 + !up_bound) / 4.0; if (d == -1) return (2.0 + !lo_bound) / 4.0; @@ -317,12 +317,12 @@ struct ProlongateSharedMG { } KOKKOS_FORCEINLINE_FUNCTION - static Real ConstantFactor(int d) { + static Real ConstantFactor(int d) { if (d == 0) return 1.0; // Indicates this dimension is not included if (d == 1 || d == -1) return 1.0; return 0.0; } - + template KOKKOS_FORCEINLINE_FUNCTION static void @@ -341,10 +341,10 @@ struct ProlongateSharedMG { const int fi = (DIM > 0) ? (i - cib.s) * 2 + ib.s : ib.s; const int fj = (DIM > 1) ? (j - cjb.s) * 2 + jb.s : jb.s; const int fk = (DIM > 2) ? (k - ckb.s) * 2 + kb.s : kb.s; - - for (int fok = 0; fok < 1 + (DIM > 2); ++fok) { - for (int foj = 0; foj < 1 + (DIM > 1); ++foj) { - for (int foi = 0; foi < 1 + (DIM > 0); ++foi) { + + for (int fok = 0; fok < 1 + (DIM > 2); ++fok) { + for (int foj = 0; foj < 1 + (DIM > 1); ++foj) { + for (int foi = 0; foi < 1 + (DIM > 0); ++foi) { auto &f = fine(element_idx, l, m, n, fk + fok, fj + foj, fi + foi); f = 0.0; const bool lo_bound_x = (fi == ib.s); @@ -356,23 +356,27 @@ struct ProlongateSharedMG { for (int ok = -(DIM > 2); ok < 1 + (DIM > 2); ++ok) { for (int oj = -(DIM > 1); oj < 1 + (DIM > 1); ++oj) { for (int oi = -(DIM > 0); oi < 1 + (DIM > 0); ++oi) { - const int dx = 4 * oi - foi + 1; - const int dy = (DIM > 1) ? 4 * oj - foj + 1 : 0; - const int dz = (DIM > 2) ? 4 * ok - fok + 1 : 0; + const int dx = 4 * oi - foi + 1; + const int dy = (DIM > 1) ? 4 * oj - foj + 1 : 0; + const int dz = (DIM > 2) ? 4 * ok - fok + 1 : 0; if constexpr (MGProlongationType::Linear == type) { - f += LinearFactor(dx, lo_bound_x, up_bound_x) - * LinearFactor(dy, lo_bound_y, up_bound_y) - * LinearFactor(dz, lo_bound_z, up_bound_z) - * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + f += LinearFactor(dx, lo_bound_x, up_bound_x) * + LinearFactor(dy, lo_bound_y, up_bound_y) * + LinearFactor(dz, lo_bound_z, up_bound_z) * + coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); } else if constexpr (MGProlongationType::Kwak == type) { - const Real fac = ((dx <= 1) + (dy <= 1 && DIM > 1) + (dz <=1 && DIM > 2)) / (2.0 * DIM); - f += fac * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); - } else if constexpr(MGProlongationType::Quadratic == type) { - f += QuadraticFactor(dx) * QuadraticFactor(dy) * QuadraticFactor(dz) * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); - } else { - f += ConstantFactor(dx) * ConstantFactor(dy) * ConstantFactor(dz) * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + const Real fac = + ((dx <= 1) + (dy <= 1 && DIM > 1) + (dz <= 1 && DIM > 2)) / + (2.0 * DIM); + f += fac * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + } else if constexpr (MGProlongationType::Quadratic == type) { + f += QuadraticFactor(dx) * QuadraticFactor(dy) * QuadraticFactor(dz) * + coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); + } else { + f += ConstantFactor(dx) * ConstantFactor(dy) * ConstantFactor(dz) * + coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); } - } + } } } } diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index ee20b422649f..cef695bd56b8 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -13,6 +13,8 @@ #ifndef SOLVERS_CG_SOLVER_HPP_ #define SOLVERS_CG_SOLVER_HPP_ +#include +#include #include #include #include @@ -84,9 +86,8 @@ class CGSolver { return names; } - CGSolver(StateDescriptor *pkg, CGParams params_in, - equations eq_in = equations(), std::vector shape = {}, - const std::string &container = "base") + CGSolver(StateDescriptor *pkg, CGParams params_in, equations eq_in = equations(), + std::vector shape = {}, const std::string &container = "base") : preconditioner(pkg, params_in.mg_params, eq_in, shape, container), params_(params_in), iter_counter(0), eqs_(eq_in), container_(container) { using namespace refinement_ops; @@ -124,8 +125,7 @@ class CGSolver { get_rhs2 = DotProduct(dependence, tl, &rhs2, md); auto initialize = tl.AddTask( TaskQualifier::once_per_region | TaskQualifier::local_sync, - zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, - "zero factors", + zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, "zero factors", [](CGSolver *solver) { solver->iter_counter = -1; solver->ru.val = std::numeric_limits::max(); @@ -136,8 +136,7 @@ class CGSolver { if (params_.print_per_step && Globals::my_rank == 0) { initialize = tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolver *solver, std::shared_ptr res_tol, - bool relative_residual) { + [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) @@ -175,8 +174,8 @@ class CGSolver { } // 2. beta <- r dot u / r dot u {old} - auto get_ru = DotProduct(precon, itl, &ru, md); - + auto get_ru = DotProduct(precon, itl, &ru, md); + // 3. p <- u + beta p auto correct_p = itl.AddTask( get_ru, "p <- u + beta p", @@ -185,26 +184,26 @@ class CGSolver { return AddFieldsAndStore(md, 1.0, beta); }, this, md); - + // 4. v <- A p auto copy_u = itl.AddTask(correct_p, TF(CopyData), md); auto comm = AddBoundaryExchangeTasks(copy_u, itl, md_comm, multilevel); auto get_v = eqs_.template Ax(itl, comm, md); - // 5. alpha <- r dot u / p dot v (calculate denominator) + // 5. alpha <- r dot u / p dot v (calculate denominator) auto get_pAp = DotProduct(get_v, itl, &pAp, md); - // 6. x <- x + alpha p + // 6. x <- x + alpha p auto correct_x = itl.AddTask( get_pAp, "x <- x + alpha p", [](CGSolver *solver, std::shared_ptr> &md) { Real alpha = solver->ru.val / solver->pAp.val; return AddFieldsAndStore(md, 1.0, alpha); }, - this, md); - - // 6. r <- r - alpha A p + this, md); + + // 6. r <- r - alpha A p auto correct_r = itl.AddTask( get_pAp, "r <- r - alpha A p", [](CGSolver *solver, std::shared_ptr> &md) { @@ -225,11 +224,11 @@ class CGSolver { return TaskStatus::complete; }, this, pmesh); - + auto check = itl.AddTask( TaskQualifier::completion, get_res | correct_x, "completion", - [](CGSolver *solver, Mesh *pmesh, int max_iter, - std::shared_ptr res_tol, bool relative_residual) { + [](CGSolver *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, + bool relative_residual) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); solver->final_residual = rms_res; solver->final_iteration = solver->iter_counter; diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 723f7cd93596..6f253d125207 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -100,15 +100,19 @@ class MGSolver { Metadata({Metadata::Cell, Metadata::Independent, Metadata::GMGRestrict, Metadata::GMGProlongate, Metadata::OneCopy}, shape); - + if (params_.prolongation == "Linear") { - mres_err.RegisterRefinementOps, RestrictAverage>(); + mres_err.RegisterRefinementOps, + RestrictAverage>(); } else if (params_.prolongation == "Kwak") { - mres_err.RegisterRefinementOps, RestrictAverage>(); + mres_err.RegisterRefinementOps, + RestrictAverage>(); } else if (params_.prolongation == "Quadratic") { - mres_err.RegisterRefinementOps, RestrictAverage>(); + mres_err.RegisterRefinementOps, + RestrictAverage>(); } else if (params_.prolongation == "Constant") { - mres_err.RegisterRefinementOps, RestrictAverage>(); + mres_err.RegisterRefinementOps, + RestrictAverage>(); } else if (params_.prolongation == "OldLinear") { mres_err.RegisterRefinementOps(); } else { From a1bd975e96b8cfa62710ac475875870e1d738ac6 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:33:15 -0600 Subject: [PATCH 08/62] allow for index flattening --- src/basic_types.hpp | 2 ++ src/utils/indexer.hpp | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/basic_types.hpp b/src/basic_types.hpp index f1f07878a533..4db850021984 100644 --- a/src/basic_types.hpp +++ b/src/basic_types.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,7 @@ using Real = double; struct IndexRange { int s = 0; /// Starting Index (inclusive) int e = 0; /// Ending Index (inclusive) + operator std::pair() const { return {s, e}; } }; // Enum speficying whether or not you requested a flux variable in diff --git a/src/utils/indexer.hpp b/src/utils/indexer.hpp index 43e4b613087c..f996b9c89281 100644 --- a/src/utils/indexer.hpp +++ b/src/utils/indexer.hpp @@ -95,6 +95,11 @@ struct Indexer { return GetIndicesImpl(idx, std::make_index_sequence()); } + KOKKOS_FORCEINLINE_FUNCTION + std::size_t GetFlatIdx(Ts... ts) const { + return GetFlatIndexImpl(ts..., std::make_index_sequence()); + } + KOKKOS_FORCEINLINE_FUNCTION auto GetIdxArray(int idx) const { return get_array_from_tuple( @@ -128,6 +133,19 @@ struct Indexer { return idxs; } + template + KOKKOS_FORCEINLINE_FUNCTION std::size_t + GetFlatIndexImpl(Ts... idxs, std::index_sequence) const { + std::size_t out{0}; + ( + [&] { + idxs -= start[Is]; + out += idxs * N[Is]; + }(), + ...); + return out; + } + template KOKKOS_FORCEINLINE_FUNCTION static std::array GetFactors(std::tuple Nt, std::index_sequence) { From 627efe7848e6db8e512b3946e757302c9924037f Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:33:51 -0600 Subject: [PATCH 09/62] Add diagonal preconditioning --- src/solvers/bicgstab_solver.hpp | 38 +++++++++++++++++++++++++++------ src/solvers/solver_utils.hpp | 21 ++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index 8632d0a68a67..ba1793047370 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -32,11 +32,12 @@ namespace parthenon { namespace solvers { +enum class Preconditioner { None, Diagonal, Multigrid }; struct BiCGSTABParams { MGParams mg_params; int max_iters = 1000; std::shared_ptr residual_tolerance = std::make_shared(1.e-12); - bool precondition = true; + Preconditioner precondition_type = Preconditioner::Multigrid; bool print_per_step = false; bool relative_residual = false; BiCGSTABParams() = default; @@ -44,7 +45,16 @@ struct BiCGSTABParams { max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); *residual_tolerance = pin->GetOrAddReal(input_block, "residual_tolerance", *residual_tolerance); - precondition = pin->GetOrAddBoolean(input_block, "precondition", precondition); + bool precondition = pin->GetOrAddBoolean(input_block, "precondition", true); + std::string precondition_str = + pin->GetOrAddString(input_block, "preconditioner", "Multigrid"); + if (precondition && precondition_str == "Multigrid") { + precondition_type = Preconditioner::Multigrid; + } else if (precondition && precondition_str == "Diagonal") { + precondition_type = Preconditioner::Diagonal; + } else { + precondition_type = Preconditioner::None; + } print_per_step = pin->GetOrAddBoolean(input_block, "print_per_step", print_per_step); mg_params = MGParams(pin, input_block); relative_residual = @@ -70,6 +80,7 @@ class BiCGSTABSolver { PARTHENON_INTERNALSOLVERVARIABLE(u, r); PARTHENON_INTERNALSOLVERVARIABLE(u, p); PARTHENON_INTERNALSOLVERVARIABLE(u, x); + PARTHENON_INTERNALSOLVERVARIABLE(u, diag); using internal_types_tl = TypeList; using preconditioner_t = MGSolver; @@ -79,7 +90,7 @@ class BiCGSTABSolver { std::vector GetInternalVariableNames() const { std::vector names; - if (params_.precondition) { + if (params_.precondition_type == Preconditioner::Multigrid) { all_internal_types_tl::IterateTypes( [&names](auto t) { names.push_back(decltype(t)::name()); }); } else { @@ -105,11 +116,20 @@ class BiCGSTABSolver { pkg->AddField(r::name(), m_no_ghost); pkg->AddField(p::name(), m_no_ghost); pkg->AddField(x::name(), m_no_ghost); + pkg->AddField(diag::name(), m_no_ghost); } template TaskID AddSetupTasks(TL_t &tl, TaskID dependence, int partition, Mesh *pmesh) { - return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); + if (params_.precondition_type == Preconditioner::Multigrid) { + return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); + } else if (params_.precondition_type == Preconditioner::Diagonal) { + auto partitions = pmesh->GetDefaultBlockPartitions(); + auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); + return tl.AddTask(dependence, &equations::template SetDiagonal, &eqs_, md); + } else { + return dependence; + } } TaskID AddTasks(TaskList &tl, TaskID dependence, Mesh *pmesh, const int partition) { @@ -174,13 +194,15 @@ class BiCGSTABSolver { // 1. u <- M p auto precon1 = reset; - if (params_.precondition) { + if (params_.precondition_type == Preconditioner::Multigrid) { auto set_rhs = itl.AddTask(precon1, TF(CopyData), md); auto zero_u = itl.AddTask(precon1, TF(SetToZero), md); precon1 = preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + } else if (params_.precondition_type == Preconditioner::Diagonal) { + precon1 = itl.AddTask(precon1, TF(ADividedByB), md); } else { - precon1 = itl.AddTask(none, TF(CopyData), md); + precon1 = itl.AddTask(precon1, TF(CopyData), md); } // 2. v <- A u @@ -224,11 +246,13 @@ class BiCGSTABSolver { // 6. u <- M s auto precon2 = correct_s; - if (params_.precondition) { + if (params_.precondition_type == Preconditioner::Multigrid) { auto set_rhs = itl.AddTask(precon2, TF(CopyData), md); auto zero_u = itl.AddTask(precon2, TF(SetToZero), md); precon2 = preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + } else if (params_.precondition_type == Preconditioner::Diagonal) { + precon2 = itl.AddTask(precon2, TF(ADividedByB), md); } else { precon2 = itl.AddTask(precon2, TF(CopyData), md); } diff --git a/src/solvers/solver_utils.hpp b/src/solvers/solver_utils.hpp index 871462d11f31..6f203e4aa97e 100644 --- a/src/solvers/solver_utils.hpp +++ b/src/solvers/solver_utils.hpp @@ -256,6 +256,27 @@ TaskStatus SetToZero(const std::shared_ptr> &md) { return TaskStatus::complete; } +template +TaskStatus ADividedByB(const std::shared_ptr> &md) { + IndexRange ib = md->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBoundsK(IndexDomain::interior); + + static auto desc = parthenon::MakePackDescriptor(md.get()); + auto pack = desc.GetPack(md.get()); + parthenon::par_for( + DEFAULT_LOOP_PATTERN, "DotProduct", DevExecSpace(), 0, pack.GetNBlocks() - 1, kb.s, + kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + // printf("(%i, %i) diag_elem = %e\n", j, i, pack(b, b_t(), k, j, i)); + const int nvars = pack.GetUpperBound(b, a_t()) - pack.GetLowerBound(b, a_t()) + 1; + for (int c = 0; c < nvars; ++c) + pack(b, out_t(c), k, j, i) = + pack(b, a_t(c), k, j, i) / pack(b, b_t(c), k, j, i); + }); + return TaskStatus::complete; +} + template TaskStatus DotProductLocal(const std::shared_ptr> &md, AllReduce *adotb) { From 81409387b44301c75d684384dcbdecc9d3e8b644 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:34:34 -0600 Subject: [PATCH 10/62] Include boundary info in sparse pack --- src/interface/sparse_pack.hpp | 9 +++-- src/interface/sparse_pack_base.cpp | 53 +++++++++++++++++++++++------- src/interface/sparse_pack_base.hpp | 2 ++ src/mesh/meshblock.hpp | 4 +++ 4 files changed, 54 insertions(+), 14 deletions(-) diff --git a/src/interface/sparse_pack.hpp b/src/interface/sparse_pack.hpp index 8b9803dd0b79..bdd48792b66b 100644 --- a/src/interface/sparse_pack.hpp +++ b/src/interface/sparse_pack.hpp @@ -191,13 +191,18 @@ class SparsePack : public SparsePackBase { KOKKOS_INLINE_FUNCTION int GetLevel(const int b, const int off3, const int off2, const int off1) const { - return block_props_(b, (off3 + 1) + 3 * ((off2 + 1) + 3 * (off1 + 1))); + return block_props_(b, (off1 + 1) + 3 * ((off2 + 1) + 3 * (off3 + 1))); + } + + KOKKOS_INLINE_FUNCTION bool IsPhysicalBoundary(const int b, const int off3, + const int off2, const int off1) const { + return block_props_(b, (off1 + 1) + 3 * ((off2 + 1) + 3 * (off3 + 1))) == bnd_flag; } KOKKOS_INLINE_FUNCTION int GetGID(const int b) const { return block_props_(b, 27); } int GetLevelHost(const int b, const int off3, const int off2, const int off1) const { - return block_props_h_(b, (off3 + 1) + 3 * ((off2 + 1) + 3 * (off1 + 1))); + return block_props_h_(b, (off1 + 1) + 3 * ((off2 + 1) + 3 * (off3 + 1))); } int GetGIDHost(const int b) const { return block_props_h_(b, 27); } diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index 2a7a5b70c41c..885a8bdf7cfb 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -164,7 +164,8 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, pack.bounds_h_ = Kokkos::create_mirror_view(pack.bounds_); // This array stores refinement levels of current block and all neighboring blocks. - pack.block_props_ = block_props_t("block_props", nblocks, 27 + 1); + const Indexer3D bp_idxer({-1, 1}, {-1, 1}, {-1, 1}); + pack.block_props_ = block_props_t("block_props", nblocks, bp_idxer.size() + 1); pack.block_props_h_ = Kokkos::create_mirror_view(pack.block_props_); pack.coords_ = coords_t("coords", desc.flat ? max_size : nblocks); @@ -176,33 +177,61 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, ForEachBlock(pmd, include_block, [&](int block, mbd_t *pmbd) { int b = 0; const auto &uid_map = pmbd->GetUidMap(); + const auto &pmb = pmbd->GetBlockPointer(); if (!desc.flat) { idx = 0; b = blidx; // JMM: This line could be unified with the coords_h line below, // but it would imply unnecessary copies in the case of non-flat // packs. - coords_h(b) = pmbd->GetBlockPointer()->coords_device; + coords_h(b) = pmb->coords_device; } // Initialize block refinement levels to current block level to provide default if // neighbors not present - for (int n = 0; n < 27; n++) { - pack.block_props_h_(blidx, (1 + 3 * (1 + 3 * 1))) = - pmbd->GetBlockPointer()->loc.level(); + for (int n = 0; n < bp_idxer.size(); n++) { + pack.block_props_h_(blidx, n) = pmb->loc.level(); } - // This block's gid stored in central (1, 1, 1, 1) element - pack.block_props_h_(blidx, 27) = pmbd->GetBlockPointer()->gid; - for (auto &neighbor : pmbd->GetBlockPointer()->neighbors) { + // This block's gid stored at the end of the flattened array + pack.block_props_h_(blidx, bp_idxer.size()) = pmb->gid; + auto *neighbors = &(pmb->neighbors); + if constexpr (!std::is_same_v) { + if (pmd->grid.type == GridType::two_level_composite) { + if (pmb->loc.level() == pmd->grid.logical_level) { + neighbors = &(pmb->gmg_same_neighbors); + } else { + neighbors = &(pmb->gmg_composite_finer_neighbors); + } + } + } + for (auto &neighbor : *neighbors) { // Multiple refined neighbors may write to the same index but they will always have // the same refinement level. - pack.block_props_h_( - blidx, (neighbor.offsets[2] + 1) + - 3 * ((neighbor.offsets[1] + 1) + 3 * (neighbor.offsets[0] + 1))) = - neighbor.loc.level(); + + // !!Warning: This reverses the indexing from what brryan had previously, but the + // associated routines for getting the block properties have also had their + // values switched + pack.block_props_h_(blidx, neighbor.offsets.GetIdx()) = neighbor.loc.level(); // Currently not storing neighbor gids } + for (int oxb = -1; oxb <= 1; ++oxb) { + for (int oxa = -1; oxa <= 1; ++oxa) { + if (pmb->IsPhysicalBoundary(inner_x1)) + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, oxa, -1)) = bnd_flag; + if (pmb->IsPhysicalBoundary(outer_x1)) + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, oxa, 1)) = bnd_flag; + if (pmb->IsPhysicalBoundary(inner_x2)) + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, -1, oxa)) = bnd_flag; + if (pmb->IsPhysicalBoundary(outer_x2)) + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, 1, oxa)) = bnd_flag; + if (pmb->IsPhysicalBoundary(inner_x3)) + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(-1, oxb, oxa)) = bnd_flag; + if (pmb->IsPhysicalBoundary(outer_x3)) + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(1, oxb, oxa)) = bnd_flag; + } + } + for (int i = 0; i < nvar; ++i) { pack.bounds_h_(0, blidx, i) = idx; for (const auto &[var_name, uid] : desc.var_groups[i]) { diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index 0deca487a20a..6a632e60be7f 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -63,6 +63,8 @@ class SparsePackBase { using block_props_h_t = typename block_props_t::HostMirror; using coords_t = ParArray1D>; + static constexpr int bnd_flag = -2000; + // Returns a SparsePackBase object that is either newly created or taken // from the cache in pmd. The cache itself handles the all of this logic template diff --git a/src/mesh/meshblock.hpp b/src/mesh/meshblock.hpp index 60ce4d86282d..6fd4cb6f6cb0 100644 --- a/src/mesh/meshblock.hpp +++ b/src/mesh/meshblock.hpp @@ -186,6 +186,10 @@ class MeshBlock : public std::enable_shared_from_this { BoundaryFlag boundary_flag[6]; + bool IsPhysicalBoundary(BoundaryFace bf) const { + return boundary_flag[bf] != BoundaryFlag::block; + } + // functions // Load balancing void SetCostForLoadBalancing(double cost); From 879dcb543bb797a70014db6e53eb48ecbda2b1e4 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:34:51 -0600 Subject: [PATCH 11/62] small --- src/prolong_restrict/pr_ops.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/prolong_restrict/pr_ops.hpp b/src/prolong_restrict/pr_ops.hpp index 5e6bf223b31b..3fe53bd908ae 100644 --- a/src/prolong_restrict/pr_ops.hpp +++ b/src/prolong_restrict/pr_ops.hpp @@ -303,7 +303,8 @@ struct ProlongateSharedMG { if (d == 0) return 1.0; // Indicates this dimension is not included if (d == 1 || d == -1) return 30.0 / 32.0; if (d == 3 || d == -3) return 5.0 / 32.0; - return -3.0 / 32.0; + if (d == 5 || d == -5) return -3.0 / 32.0; + return 0.0; } KOKKOS_FORCEINLINE_FUNCTION @@ -347,11 +348,11 @@ struct ProlongateSharedMG { for (int foi = 0; foi < 1 + (DIM > 0); ++foi) { auto &f = fine(element_idx, l, m, n, fk + fok, fj + foj, fi + foi); f = 0.0; - const bool lo_bound_x = (fi == ib.s); + const bool lo_bound_x = ((fi + foi) == ib.s); const bool up_bound_x = ((fi + foi) == ib.e); - const bool lo_bound_y = (fj == jb.s); + const bool lo_bound_y = ((fj + foj) == jb.s); const bool up_bound_y = ((fj + foj) == jb.e); - const bool lo_bound_z = (fk == kb.s); + const bool lo_bound_z = ((fk + fok) == kb.s); const bool up_bound_z = ((fk + fok) == kb.e); for (int ok = -(DIM > 2); ok < 1 + (DIM > 2); ++ok) { for (int oj = -(DIM > 1); oj < 1 + (DIM > 1); ++oj) { From 63e58e02266ebbed04ed5e59421939f446ef999f Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:35:13 -0600 Subject: [PATCH 12/62] Only smooth on active blocks --- src/solvers/mg_solver.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 6f253d125207..8a648fbc51c3 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -251,7 +251,13 @@ class MGSolver { int nblocks = md->NumBlocks(); std::vector include_block(nblocks, true); - + if (md->grid.type == GridType::two_level_composite) { + int current_level = md->grid.logical_level; + for (int b = 0; b < nblocks; ++b) { + include_block[b] = + md->GetBlockData(b)->GetBlockPointer()->loc.level() == current_level; + } + } static auto desc = parthenon::MakePackDescriptor(md.get()); auto pack = desc.GetPack(md.get(), include_block); From caa959a9f7922867675ce8b8be09aff9ee12997e Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:35:38 -0600 Subject: [PATCH 13/62] Allow switching boundary prolongation method --- example/poisson_gmg/poisson_package.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 4c7dff37ce01..611915097071 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -119,13 +119,21 @@ std::shared_ptr Initialize(ParameterInput *pin) { auto mD = Metadata( {Metadata::Independent, Metadata::OneCopy, Metadata::Face, Metadata::GMGRestrict}); mD.RegisterRefinementOps(); + // Holds the discretized version of D in \nabla \cdot D(\vec{x}) \nabla u = rhs. D = 1 // for the standard Poisson equation. pkg->AddField(D::name(), mD); auto mflux_comm = Metadata({Metadata::Cell, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, Metadata::GMGRestrict}); - mflux_comm.RegisterRefinementOps(); + std::string prolong = pin->GetOrAddString("poisson", "prolongation", "Linear"); + if (prolong == "Linear") { + mflux_comm.RegisterRefinementOps(); + } else if (prolong == "Constant") { + mflux_comm.RegisterRefinementOps(); + } else { + PARTHENON_FAIL("Unknown prolongation method for Poisson boundaries."); + } // u is the solution vector that starts with an initial guess and then gets updated // by the solver pkg->AddField(u::name(), mflux_comm); From 0da700f46dd1ee7d8ec17c9c3739146e106c9864 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:35:50 -0600 Subject: [PATCH 14/62] explicitly impose boundary conditions on fluxes --- example/poisson_gmg/poisson_equation.hpp | 77 ++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index d246b83a757a..e8f10d63aae0 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -42,6 +42,7 @@ class PoissonEquation { parthenon::TaskID Ax(TL_t &tl, parthenon::TaskID depends_on, std::shared_ptr> &md) { auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md); + flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md); if (do_flux_cor && !(md->grid.type == parthenon::GridType::two_level_composite)) { auto start_flxcor = tl.AddTask(flux_res, parthenon::StartReceiveFluxCorrections, md); @@ -160,6 +161,82 @@ class PoissonEquation { return TaskStatus::complete; } + template + static parthenon::TaskStatus + SetFluxBoundaries(std::shared_ptr> &md) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + IndexRange ib = md->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBoundsK(IndexDomain::interior); + + using TE = parthenon::TopologicalElement; + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + + auto desc = + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + auto pack = desc.GetPack(md.get(), include_block); + const std::size_t scratch_size_in_bytes = 0; + const std::size_t scratch_level = 1; + + const parthenon::Indexer3D idxers[6]{ + parthenon::Indexer3D(kb, jb, {ib.s, ib.s}), + parthenon::Indexer3D(kb, jb, {ib.e + 1, ib.e + 1}), + parthenon::Indexer3D(kb, {jb.s, jb.s}, ib), + parthenon::Indexer3D(kb, {jb.e + 1, jb.e + 1}, ib), + parthenon::Indexer3D({kb.s, kb.s}, jb, ib), + parthenon::Indexer3D({kb.e + 1, kb.e + 1}, jb, ib)}; + constexpr int x1off[6]{-1, 1, 0, 0, 0, 0}; + constexpr int x2off[6]{0, 0, -1, 1, 0, 0}; + constexpr int x3off[6]{0, 0, 0, 0, -1, 1}; + constexpr TE tes[6]{TE::F1, TE::F1, TE::F2, TE::F2, TE::F3, TE::F3}; + constexpr int dirs[6]{X1DIR, X1DIR, X2DIR, X2DIR, X3DIR, X3DIR}; + + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "InitializeRadiationQuantities", DevExecSpace(), + scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + const auto &coords = pack.GetCoordinates(b); + const int gid = pack.GetGID(b); + const int level = pack.GetLevel(b, 0, 0, 0); + const Real dxs[3]{coords.template Dxc(), coords.template Dxc(), + coords.template Dxc()}; + for (int face = 0; face < ndim * 2; ++face) { + const Real dx = dxs[dirs[face] - 1]; + const auto &idxer = idxers[face]; + const auto dir = dirs[face]; + const auto te = tes[face]; + // Impose the zero Dirichlet boundary condition at the actual boundary + if (pack.IsPhysicalBoundary(b, x3off[face], x2off[face], x1off[face])) { + const int koff = x3off[face] > 0 ? -1 : 0; + const int joff = x2off[face] > 0 ? -1 : 0; + const int ioff = x1off[face] > 0 ? -1 : 0; + const int sign = x1off[face] + x2off[face] + x3off[face]; + parthenon::par_for_inner( + DEFAULT_INNER_LOOP_PATTERN, member, 0, idxer.size() - 1, + [&](const int idx) { + const auto [k, j, i] = idxer(idx); + pack.flux(b, dir, var_t(), k, j, i) = + sign * pack(b, te, D(), k, j, i) * + pack(b, var_t(), k + koff, j + joff, i + ioff) / (0.5 * dx); + }); + } + // Correct for size of neighboring zone at fine-coarse boundary when using + // constant prolongation + if (pack.GetLevel(b, x3off[face], x2off[face], x1off[face]) == level - 1) { + parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, + idxer.size() - 1, [&](const int idx) { + const auto [k, j, i] = idxer(idx); + pack.flux(b, dir, var_t(), k, j, i) /= 1.5; + }); + } + } + }); + return TaskStatus::complete; + } + // Calculate A in_t = out_t (in the region covered by md) for a given set of fluxes // calculated with in_t (which have possibly been corrected at coarse fine boundaries) template From f8413a43d3fcb1154e6075a1ba4206e467aa7885 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 17:37:37 -0600 Subject: [PATCH 15/62] small --- example/poisson_gmg/poisson_equation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index e8f10d63aae0..6c94c530e0dd 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -195,7 +195,7 @@ class PoissonEquation { constexpr int dirs[6]{X1DIR, X1DIR, X2DIR, X2DIR, X3DIR, X3DIR}; parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "InitializeRadiationQuantities", DevExecSpace(), + DEFAULT_OUTER_LOOP_PATTERN, "SetFluxBoundaries", DevExecSpace(), scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { const auto &coords = pack.GetCoordinates(b); From 843a26476e5527025c99f6070222c0449c2ea919 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 24 Sep 2024 18:14:57 -0600 Subject: [PATCH 16/62] make defaults consistent with older versions --- example/poisson_gmg/poisson_equation.hpp | 5 ++++- example/poisson_gmg/poisson_package.cpp | 6 ++---- src/solvers/mg_solver.hpp | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index 6c94c530e0dd..9f85bb604831 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -35,6 +35,7 @@ namespace poisson_package { class PoissonEquation { public: bool do_flux_cor = false; + bool set_flux_boundary = false; // Add tasks to calculate the result of the matrix A (which is implicitly defined by // this class) being applied to x_t and store it in field out_t @@ -42,7 +43,9 @@ class PoissonEquation { parthenon::TaskID Ax(TL_t &tl, parthenon::TaskID depends_on, std::shared_ptr> &md) { auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md); - flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md); + if (set_flux_boundary) { + flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md); + } if (do_flux_cor && !(md->grid.type == parthenon::GridType::two_level_composite)) { auto start_flxcor = tl.AddTask(flux_res, parthenon::StartReceiveFluxCorrections, md); diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 611915097071..02c74c68ed16 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -87,9 +87,6 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::string solver = pin->GetOrAddString("poisson", "solver", "MG"); pkg->AddParam<>("solver", solver); - bool flux_correct = pin->GetOrAddBoolean("poisson", "flux_correct", false); - pkg->AddParam<>("flux_correct", flux_correct); - Real err_tol = pin->GetOrAddReal("poisson", "error_tolerance", 1.e-8); pkg->AddParam<>("error_tolerance", err_tol); @@ -97,7 +94,8 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg->AddParam<>("use_exact_rhs", use_exact_rhs); PoissonEquation eq; - eq.do_flux_cor = flux_correct; + eq.do_flux_cor = pin->GetOrAddBoolean("poisson", "flux_correct", false); + eq.set_flux_boundary = pin->GetOrAddBoolean("poisson", "set_flux_boundary", false); parthenon::solvers::MGParams mg_params(pin, "poisson/solver_params"); parthenon::solvers::MGSolver mg_solver(pkg.get(), mg_params, diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 8a648fbc51c3..3ea25eae441f 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -41,7 +41,7 @@ struct MGParams { std::string smoother = "SRJ2"; bool two_by_two_diagonal = false; int max_coarsenings = std::numeric_limits::max(); - std::string prolongation = "Linear"; + std::string prolongation = "OldLinear"; MGParams() = default; MGParams(ParameterInput *pin, const std::string &input_block) { From 297d9cc28f1cd50fab10d795fb5261c745efab8a Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 12:50:18 -0600 Subject: [PATCH 17/62] More freedom in setting prolongation --- example/poisson_gmg/plot_convergence.py | 41 ++++++++++++++++++ example/poisson_gmg/poisson_equation.hpp | 53 ++++++++++++++++++++++-- example/poisson_gmg/poisson_package.cpp | 10 +++-- src/solvers/mg_solver.hpp | 13 ++++-- 4 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 example/poisson_gmg/plot_convergence.py diff --git a/example/poisson_gmg/plot_convergence.py b/example/poisson_gmg/plot_convergence.py new file mode 100644 index 000000000000..9e142f53e7cb --- /dev/null +++ b/example/poisson_gmg/plot_convergence.py @@ -0,0 +1,41 @@ +# ========================================================================================= +# (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ========================================================================================= + +import numpy as np +import glob +import matplotlib.pyplot as plt +import subprocess + +plt.style.use('tableau-colorblind10') + +solver = "BiCGSTAB" +difco = 1e6 +for bound_pro in ["Constant", "Linear"]: + for interior_pro in ["Constant", "OldLinear"]: + + p = subprocess.run(["./poisson-gmg-example", "-i", "parthinput.poisson", + "poisson/solver=" + solver, + "poisson/interior_D=" + str(difco), + "poisson/prolongation=" + bound_pro, + "poisson/solver_params/prolongation=" + interior_pro], capture_output = True, text = True) + dat = np.genfromtxt(p.stdout.splitlines()) + + plt.semilogy(dat[:, 0], dat[:, 1], label=solver + "_" + str(difco) + "_" + bound_pro + "_" + interior_pro) + + +plt.legend(loc = 'upper right') +plt.ylim([1.e-14, 1e2]) +plt.xlim([0, 40]) +plt.xlabel("# of V-cycles") +plt.ylabel("RMS Residual") +plt.savefig("convergence_1e6.pdf") \ No newline at end of file diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index 9f85bb604831..2d8e8e14906b 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -36,6 +36,7 @@ class PoissonEquation { public: bool do_flux_cor = false; bool set_flux_boundary = false; + bool include_flux_dx = false; // Add tasks to calculate the result of the matrix A (which is implicitly defined by // this class) being applied to x_t and store it in field out_t @@ -44,7 +45,7 @@ class PoissonEquation { std::shared_ptr> &md) { auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md); if (set_flux_boundary) { - flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md); + flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md, include_flux_dx); } if (do_flux_cor && !(md->grid.type == parthenon::GridType::two_level_composite)) { auto start_flxcor = @@ -164,9 +165,53 @@ class PoissonEquation { return TaskStatus::complete; } + template + static parthenon::TaskStatus + Prolongate(std::shared_ptr> &md) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + IndexRange ib = md->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBoundsK(IndexDomain::interior); + IndexRange cib = md->GetBoundsI(CellLevel::coarse, IndexDomain::interior); + IndexRange cjb = md->GetBoundsJ(CellLevel::coarse, IndexDomain::interior); + IndexRange ckb = md->GetBoundsK(CellLevel::coarse, IndexDomain::interior); + + using TE = parthenon::TopologicalElement; + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + for (int b = 0; b < nblocks; ++b) { + include_block[b] = md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); + } + const auto desc = parthenon::MakePackDescriptor(md.get()); + const auto desc_coarse = parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); + auto pack = desc.GetPack(md.get(), include_block); + auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); + + parthenon::par_for( + "Prolongate", 0, pack.GetNBlocks() - 1, + pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), + kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, const int fi) { + const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; + const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; + const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); + //for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { + // for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { + // for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { + // + // } + // } + //} + }); + return TaskStatus::complete; + } + template static parthenon::TaskStatus - SetFluxBoundaries(std::shared_ptr> &md) { + SetFluxBoundaries(std::shared_ptr> &md, bool do_flux_dx) { using namespace parthenon; const int ndim = md->GetMeshPointer()->ndim; IndexRange ib = md->GetBoundsI(IndexDomain::interior); @@ -196,7 +241,6 @@ class PoissonEquation { constexpr int x3off[6]{0, 0, 0, 0, -1, 1}; constexpr TE tes[6]{TE::F1, TE::F1, TE::F2, TE::F2, TE::F3, TE::F3}; constexpr int dirs[6]{X1DIR, X1DIR, X2DIR, X2DIR, X3DIR, X3DIR}; - parthenon::par_for_outer( DEFAULT_OUTER_LOOP_PATTERN, "SetFluxBoundaries", DevExecSpace(), scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, @@ -228,7 +272,8 @@ class PoissonEquation { } // Correct for size of neighboring zone at fine-coarse boundary when using // constant prolongation - if (pack.GetLevel(b, x3off[face], x2off[face], x1off[face]) == level - 1) { + if (do_flux_dx && + pack.GetLevel(b, x3off[face], x2off[face], x1off[face]) == level - 1) { parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, idxer.size() - 1, [&](const int idx) { const auto [k, j, i] = idxer(idx); diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 02c74c68ed16..45377183acc0 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -92,11 +92,16 @@ std::shared_ptr Initialize(ParameterInput *pin) { bool use_exact_rhs = pin->GetOrAddBoolean("poisson", "use_exact_rhs", false); pkg->AddParam<>("use_exact_rhs", use_exact_rhs); + + bool flux_correct = pin->GetOrAddBoolean("poisson", "flux_correct", false); + pkg->AddParam<>("flux_correct", flux_correct); + + std::string prolong = pin->GetOrAddString("poisson", "prolongation", "Linear"); PoissonEquation eq; - eq.do_flux_cor = pin->GetOrAddBoolean("poisson", "flux_correct", false); + eq.do_flux_cor = flux_correct; eq.set_flux_boundary = pin->GetOrAddBoolean("poisson", "set_flux_boundary", false); - + eq.include_flux_dx = (prolong == "Constant"); parthenon::solvers::MGParams mg_params(pin, "poisson/solver_params"); parthenon::solvers::MGSolver mg_solver(pkg.get(), mg_params, eq); @@ -124,7 +129,6 @@ std::shared_ptr Initialize(ParameterInput *pin) { auto mflux_comm = Metadata({Metadata::Cell, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, Metadata::GMGRestrict}); - std::string prolong = pin->GetOrAddString("poisson", "prolongation", "Linear"); if (prolong == "Linear") { mflux_comm.RegisterRefinementOps(); } else if (prolong == "Constant") { diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 3ea25eae441f..a5f3c3909ed5 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -115,6 +115,9 @@ class MGSolver { RestrictAverage>(); } else if (params_.prolongation == "OldLinear") { mres_err.RegisterRefinementOps(); + } else if (params_.prolongation == "User") { + mres_err.RegisterRefinementOps, + RestrictAverage>(); } else { printf("Requested prolongation type: %s\n", params_.prolongation.c_str()); PARTHENON_FAIL("Unknown multi-grid prolongation type."); @@ -501,9 +504,13 @@ class MGSolver { TF(ReceiveBoundBufs), md_comm); auto set_from_coarser = tl.AddTask( recv_from_coarser, BTF(SetBounds), md_comm); - auto prolongate = - tl.AddTask(set_from_coarser, - BTF(ProlongateBounds), md_comm); + auto prolongate = set_from_coarser; + if (params_.prolongation == "User") { + prolongate = tl.AddTask(set_from_coarser, BTF(&equations::template Prolongate), md_comm); + } else { + prolongate = tl.AddTask(set_from_coarser, + BTF(ProlongateBounds), md_comm); + } // 7. Correct solution on this level with res_err field and store in // communication field From 4fd9883bfbe26cf115f29d2a4911a7a435a0a5e3 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 15:16:49 -0600 Subject: [PATCH 18/62] More selectability --- example/poisson_gmg/poisson_driver.cpp | 6 +- example/poisson_gmg/poisson_equation.hpp | 101 ++++++++++++++++++++--- example/poisson_gmg/poisson_package.cpp | 16 +--- src/solvers/bicgstab_solver.hpp | 3 +- src/solvers/cg_solver.hpp | 9 +- src/solvers/mg_solver.hpp | 2 +- 6 files changed, 104 insertions(+), 33 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index c5c863f8a050..b6429e2f4832 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -75,7 +75,6 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto pkg = pmesh->packages.Get("poisson_package"); auto solver = pkg->Param("solver"); - auto flux_correct = pkg->Param("flux_correct"); auto use_exact_rhs = pkg->Param("use_exact_rhs"); auto *mg_solver = pkg->MutableParam>( @@ -100,9 +99,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { if (use_exact_rhs) { auto copy_exact = tl.AddTask(get_rhs, TF(solvers::utils::CopyData), md); auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md, true); - PoissonEquation eqs; - eqs.do_flux_cor = flux_correct; - get_rhs = eqs.Ax(tl, comm, md); + auto *eqs = pkg->MutableParam("poisson_equation"); + get_rhs = eqs->Ax(tl, comm, md); } // Set initial solution guess to zero diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index 2d8e8e14906b..97b5e265ee5d 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -37,6 +37,24 @@ class PoissonEquation { bool do_flux_cor = false; bool set_flux_boundary = false; bool include_flux_dx = false; + enum class ProlongationType { Constant, Linear, Kwak }; + ProlongationType prolongation_type = ProlongationType::Constant; + + PoissonEquation(parthenon::ParameterInput *pin, const std::string &label) { + do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); + set_flux_boundary = pin->GetOrAddBoolean(label, "set_flux_boundary", false); + include_flux_dx = (pin->GetOrAddString(label, "boundary_prolongation", "Linear") == "Constant"); + auto pro_int = pin->GetOrAddString(label, "interior_prolongation", "Linear"); + if (pro_int == "Constant") { + prolongation_type = ProlongationType::Constant; + } else if (pro_int == "Linear") { + prolongation_type = ProlongationType::Linear; + } else if (pro_int == "Kwak") { + prolongation_type = ProlongationType::Kwak; + } else { + PARTHENON_FAIL("Invalid user prolongation type."); + } + } // Add tasks to calculate the result of the matrix A (which is implicitly defined by // this class) being applied to x_t and store it in field out_t @@ -166,8 +184,31 @@ class PoissonEquation { } template + parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, + std::shared_ptr> &md) { + if (prolongation_type == ProlongationType::Constant) { + return tl.AddTask(depends_on, ProlongateImpl, md); + } else if (prolongation_type == ProlongationType::Linear) { + return tl.AddTask(depends_on, ProlongateImpl, md); + } else if (prolongation_type == ProlongationType::Kwak) { + return tl.AddTask(depends_on, ProlongateImpl, md); + } + return depends_on; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real LinearFactor(int d, bool lo_bound, bool up_bound) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1) return (2.0 + !up_bound) / 4.0; + if (d == -1) return (2.0 + !lo_bound) / 4.0; + if (d == 3) return !up_bound / 4.0; + if (d == -3) return !lo_bound / 4.0; + return 0.0; + } + + template static parthenon::TaskStatus - Prolongate(std::shared_ptr> &md) { + ProlongateImpl(std::shared_ptr> &md) { using namespace parthenon; const int ndim = md->GetMeshPointer()->ndim; IndexRange ib = md->GetBoundsI(IndexDomain::interior); @@ -188,7 +229,7 @@ class PoissonEquation { const auto desc_coarse = parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); auto pack = desc.GetPack(md.get(), include_block); auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); - + parthenon::par_for( "Prolongate", 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), @@ -197,14 +238,54 @@ class PoissonEquation { const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; - pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); - //for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { - // for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { - // for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { - // - // } - // } - //} + const int fok = (fk - kb.s) % 2; + const int foj = (fj - jb.s) % 2; + const int foi = (fi - ib.s) % 2; + const bool bound[6]{ + pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), + pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), + pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), + pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), + pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), + pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; + + if constexpr (ProlongationType::Constant == prolongation_type) { + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); + } else if constexpr (ProlongationType::Linear == prolongation_type) { + pack(b, n, fk, fj, fi) = 0.0; + for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { + for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { + for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { + const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; + const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; + const int dx1 = 4 * oi - (2 * foi - 1); + pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) + * LinearFactor(dx2, bound[2], bound[3]) + * LinearFactor(dx3, bound[4], bound[5]) + * pack_coarse(b, n, ck + ok, cj + oj, ci + oi); + + } + } + } + } else if constexpr (ProlongationType::Kwak == prolongation_type) { + pack(b, n, fk, fj, fi) = 0.0; + if (ndim > 2 && !bound[4 + fok]) { + for (int ok = fok - 1; ok <= fok; ++ok) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); + } + } + if (ndim > 1 && !bound[2 + foj]) { + for (int oj = foj - 1; oj <= foj; ++oj) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); + } + } + if (ndim > 0 && !bound[foi]) { + for (int oi = foi - 1; oi <= foi; ++oi) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); + } + } + pack(b, n, fk, fj, fi) /= 2.0 * ndim; + } }); return TaskStatus::complete; } diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 45377183acc0..68bbd3af2fb0 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -78,30 +78,20 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg->UserBoundaryFunctions[BF::outer_x2].push_back(GetBC()); pkg->UserBoundaryFunctions[BF::outer_x3].push_back(GetBC()); - int max_poisson_iterations = pin->GetOrAddInteger("poisson", "max_iterations", 10000); - pkg->AddParam<>("max_iterations", max_poisson_iterations); - Real diagonal_alpha = pin->GetOrAddReal("poisson", "diagonal_alpha", 0.0); pkg->AddParam<>("diagonal_alpha", diagonal_alpha); std::string solver = pin->GetOrAddString("poisson", "solver", "MG"); pkg->AddParam<>("solver", solver); - Real err_tol = pin->GetOrAddReal("poisson", "error_tolerance", 1.e-8); - pkg->AddParam<>("error_tolerance", err_tol); - bool use_exact_rhs = pin->GetOrAddBoolean("poisson", "use_exact_rhs", false); pkg->AddParam<>("use_exact_rhs", use_exact_rhs); - bool flux_correct = pin->GetOrAddBoolean("poisson", "flux_correct", false); - pkg->AddParam<>("flux_correct", flux_correct); + std::string prolong = pin->GetOrAddString("poisson", "boundary_prolongation", "Linear"); - std::string prolong = pin->GetOrAddString("poisson", "prolongation", "Linear"); + PoissonEquation eq(pin, "poisson"); + pkg->AddParam<>("poisson_equation", eq, parthenon::Params::Mutability::Mutable); - PoissonEquation eq; - eq.do_flux_cor = flux_correct; - eq.set_flux_boundary = pin->GetOrAddBoolean("poisson", "set_flux_boundary", false); - eq.include_flux_dx = (prolong == "Constant"); parthenon::solvers::MGParams mg_params(pin, "poisson/solver_params"); parthenon::solvers::MGSolver mg_solver(pkg.get(), mg_params, eq); diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index 995ffeb15c36..9c173493fd62 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -151,7 +151,7 @@ class BiCGSTABSolver { auto copy_rhat0 = tl.AddTask(dependence, TF(CopyData), md); auto get_rhat0r_init = DotProduct(dependence, tl, &rhat0r, md); auto get_rhs2 = get_rhat0r_init; - if (params_.relative_residual) + if (params_.relative_residual || params_.print_per_step) get_rhs2 = DotProduct(dependence, tl, &rhs2, md); auto initialize = tl.AddTask( TaskQualifier::once_per_region | TaskQualifier::local_sync, @@ -172,6 +172,7 @@ class BiCGSTABSolver { : *res_tol; printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", tol); + printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); } return TaskStatus::complete; }, diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index cef695bd56b8..63d97cc0f0f8 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -121,7 +121,7 @@ class CGSolver { auto zero_p = tl.AddTask(dependence, TF(SetToZero

), md); auto copy_r = tl.AddTask(dependence, TF(CopyData), md); auto get_rhs2 = none; - if (params_.relative_residual) + if (params_.relative_residual || params_.print_per_step) get_rhs2 = DotProduct(dependence, tl, &rhs2, md); auto initialize = tl.AddTask( TaskQualifier::once_per_region | TaskQualifier::local_sync, @@ -136,16 +136,17 @@ class CGSolver { if (params_.print_per_step && Globals::my_rank == 0) { initialize = tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual) { + [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual, Mesh *pm) { Real tol = relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) + ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) : *res_tol; printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", tol); + printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); return TaskStatus::complete; }, - this, params_.residual_tolerance, params_.relative_residual); + this, params_.residual_tolerance, params_.relative_residual, pmesh); } // BEGIN ITERATIVE TASKS diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index a5f3c3909ed5..73662448ac21 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -506,7 +506,7 @@ class MGSolver { recv_from_coarser, BTF(SetBounds), md_comm); auto prolongate = set_from_coarser; if (params_.prolongation == "User") { - prolongate = tl.AddTask(set_from_coarser, BTF(&equations::template Prolongate), md_comm); + prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_comm); } else { prolongate = tl.AddTask(set_from_coarser, BTF(ProlongateBounds), md_comm); From bca3a1b1397c17715b82799cd7b7f888c7cc909e Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 15:24:27 -0600 Subject: [PATCH 19/62] remove MG prolongation operations since they are now user defined --- src/prolong_restrict/pr_ops.hpp | 98 --------------------------------- src/solvers/mg_solver.hpp | 25 +-------- 2 files changed, 3 insertions(+), 120 deletions(-) diff --git a/src/prolong_restrict/pr_ops.hpp b/src/prolong_restrict/pr_ops.hpp index 3fe53bd908ae..f074f6a5d02b 100644 --- a/src/prolong_restrict/pr_ops.hpp +++ b/src/prolong_restrict/pr_ops.hpp @@ -288,104 +288,6 @@ using ProlongateSharedMinMod = ProlongateSharedGeneral; using ProlongateSharedLinear = ProlongateSharedGeneral; using ProlongatePiecewiseConstant = ProlongateSharedGeneral; -enum class MGProlongationType { Constant, Linear, Quadratic, Kwak }; - -template -struct ProlongateSharedMG { - static constexpr bool OperationRequired(TopologicalElement fel, - TopologicalElement cel) { - if (fel != TopologicalElement::CC) return false; - return fel == cel; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real QuadraticFactor(int d) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1 || d == -1) return 30.0 / 32.0; - if (d == 3 || d == -3) return 5.0 / 32.0; - if (d == 5 || d == -5) return -3.0 / 32.0; - return 0.0; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real LinearFactor(int d, bool up_bound, bool lo_bound) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1) return (2.0 + !up_bound) / 4.0; - if (d == -1) return (2.0 + !lo_bound) / 4.0; - if (d == 3) return !up_bound / 4.0; - if (d == -3) return !lo_bound / 4.0; - return 0.0; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real ConstantFactor(int d) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1 || d == -1) return 1.0; - return 0.0; - } - - template - KOKKOS_FORCEINLINE_FUNCTION static void - Do(const int l, const int m, const int n, const int k, const int j, const int i, - const IndexRange &ckb, const IndexRange &cjb, const IndexRange &cib, - const IndexRange &kb, const IndexRange &jb, const IndexRange &ib, - const Coordinates_t &coords, const Coordinates_t &coarse_coords, - const ParArrayND *pcoarse, - const ParArrayND *pfine) { - using namespace util; - auto &coarse = *pcoarse; - auto &fine = *pfine; - - constexpr int element_idx = static_cast(el) % 3; - - const int fi = (DIM > 0) ? (i - cib.s) * 2 + ib.s : ib.s; - const int fj = (DIM > 1) ? (j - cjb.s) * 2 + jb.s : jb.s; - const int fk = (DIM > 2) ? (k - ckb.s) * 2 + kb.s : kb.s; - - for (int fok = 0; fok < 1 + (DIM > 2); ++fok) { - for (int foj = 0; foj < 1 + (DIM > 1); ++foj) { - for (int foi = 0; foi < 1 + (DIM > 0); ++foi) { - auto &f = fine(element_idx, l, m, n, fk + fok, fj + foj, fi + foi); - f = 0.0; - const bool lo_bound_x = ((fi + foi) == ib.s); - const bool up_bound_x = ((fi + foi) == ib.e); - const bool lo_bound_y = ((fj + foj) == jb.s); - const bool up_bound_y = ((fj + foj) == jb.e); - const bool lo_bound_z = ((fk + fok) == kb.s); - const bool up_bound_z = ((fk + fok) == kb.e); - for (int ok = -(DIM > 2); ok < 1 + (DIM > 2); ++ok) { - for (int oj = -(DIM > 1); oj < 1 + (DIM > 1); ++oj) { - for (int oi = -(DIM > 0); oi < 1 + (DIM > 0); ++oi) { - const int dx = 4 * oi - foi + 1; - const int dy = (DIM > 1) ? 4 * oj - foj + 1 : 0; - const int dz = (DIM > 2) ? 4 * ok - fok + 1 : 0; - if constexpr (MGProlongationType::Linear == type) { - f += LinearFactor(dx, lo_bound_x, up_bound_x) * - LinearFactor(dy, lo_bound_y, up_bound_y) * - LinearFactor(dz, lo_bound_z, up_bound_z) * - coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); - } else if constexpr (MGProlongationType::Kwak == type) { - const Real fac = - ((dx <= 1) + (dy <= 1 && DIM > 1) + (dz <= 1 && DIM > 2)) / - (2.0 * DIM); - f += fac * coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); - } else if constexpr (MGProlongationType::Quadratic == type) { - f += QuadraticFactor(dx) * QuadraticFactor(dy) * QuadraticFactor(dz) * - coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); - } else { - f += ConstantFactor(dx) * ConstantFactor(dy) * ConstantFactor(dz) * - coarse(element_idx, l, m, n, k + ok, j + oj, i + oi); - } - } - } - } - } - } - } - } -}; - struct ProlongateInternalAverage { static constexpr bool OperationRequired(TopologicalElement fel, TopologicalElement cel) { diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 73662448ac21..d820d3fde547 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -101,27 +101,7 @@ class MGSolver { Metadata::GMGProlongate, Metadata::OneCopy}, shape); - if (params_.prolongation == "Linear") { - mres_err.RegisterRefinementOps, - RestrictAverage>(); - } else if (params_.prolongation == "Kwak") { - mres_err.RegisterRefinementOps, - RestrictAverage>(); - } else if (params_.prolongation == "Quadratic") { - mres_err.RegisterRefinementOps, - RestrictAverage>(); - } else if (params_.prolongation == "Constant") { - mres_err.RegisterRefinementOps, - RestrictAverage>(); - } else if (params_.prolongation == "OldLinear") { - mres_err.RegisterRefinementOps(); - } else if (params_.prolongation == "User") { - mres_err.RegisterRefinementOps, - RestrictAverage>(); - } else { - printf("Requested prolongation type: %s\n", params_.prolongation.c_str()); - PARTHENON_FAIL("Unknown multi-grid prolongation type."); - } + mres_err.RegisterRefinementOps(); pkg->AddField(res_err::name(), mres_err); auto mtemp = @@ -508,7 +488,8 @@ class MGSolver { if (params_.prolongation == "User") { prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_comm); } else { - prolongate = tl.AddTask(set_from_coarser, + prolongate = + tl.AddTask(set_from_coarser, BTF(ProlongateBounds), md_comm); } From d554036717275c584991fd33742fca68f37c76b7 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 15:25:39 -0600 Subject: [PATCH 20/62] format --- example/poisson_gmg/plot_convergence.py | 38 ++++++--- example/poisson_gmg/poisson_equation.hpp | 101 +++++++++++++---------- example/poisson_gmg/poisson_package.cpp | 2 +- src/solvers/cg_solver.hpp | 10 +-- 4 files changed, 88 insertions(+), 63 deletions(-) diff --git a/example/poisson_gmg/plot_convergence.py b/example/poisson_gmg/plot_convergence.py index 9e142f53e7cb..89caa569a906 100644 --- a/example/poisson_gmg/plot_convergence.py +++ b/example/poisson_gmg/plot_convergence.py @@ -13,29 +13,41 @@ import numpy as np import glob -import matplotlib.pyplot as plt +import matplotlib.pyplot as plt import subprocess -plt.style.use('tableau-colorblind10') +plt.style.use("tableau-colorblind10") solver = "BiCGSTAB" difco = 1e6 for bound_pro in ["Constant", "Linear"]: - for interior_pro in ["Constant", "OldLinear"]: + for interior_pro in ["Constant", "OldLinear"]: - p = subprocess.run(["./poisson-gmg-example", "-i", "parthinput.poisson", - "poisson/solver=" + solver, - "poisson/interior_D=" + str(difco), - "poisson/prolongation=" + bound_pro, - "poisson/solver_params/prolongation=" + interior_pro], capture_output = True, text = True) - dat = np.genfromtxt(p.stdout.splitlines()) + p = subprocess.run( + [ + "./poisson-gmg-example", + "-i", + "parthinput.poisson", + "poisson/solver=" + solver, + "poisson/interior_D=" + str(difco), + "poisson/prolongation=" + bound_pro, + "poisson/solver_params/prolongation=" + interior_pro, + ], + capture_output=True, + text=True, + ) + dat = np.genfromtxt(p.stdout.splitlines()) - plt.semilogy(dat[:, 0], dat[:, 1], label=solver + "_" + str(difco) + "_" + bound_pro + "_" + interior_pro) + plt.semilogy( + dat[:, 0], + dat[:, 1], + label=solver + "_" + str(difco) + "_" + bound_pro + "_" + interior_pro, + ) -plt.legend(loc = 'upper right') -plt.ylim([1.e-14, 1e2]) +plt.legend(loc="upper right") +plt.ylim([1.0e-14, 1e2]) plt.xlim([0, 40]) plt.xlabel("# of V-cycles") plt.ylabel("RMS Residual") -plt.savefig("convergence_1e6.pdf") \ No newline at end of file +plt.savefig("convergence_1e6.pdf") diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index 97b5e265ee5d..d8c7fc692ba6 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -37,21 +37,22 @@ class PoissonEquation { bool do_flux_cor = false; bool set_flux_boundary = false; bool include_flux_dx = false; - enum class ProlongationType { Constant, Linear, Kwak }; - ProlongationType prolongation_type = ProlongationType::Constant; + enum class ProlongationType { Constant, Linear, Kwak }; + ProlongationType prolongation_type = ProlongationType::Constant; PoissonEquation(parthenon::ParameterInput *pin, const std::string &label) { do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); set_flux_boundary = pin->GetOrAddBoolean(label, "set_flux_boundary", false); - include_flux_dx = (pin->GetOrAddString(label, "boundary_prolongation", "Linear") == "Constant"); + include_flux_dx = + (pin->GetOrAddString(label, "boundary_prolongation", "Linear") == "Constant"); auto pro_int = pin->GetOrAddString(label, "interior_prolongation", "Linear"); - if (pro_int == "Constant") { + if (pro_int == "Constant") { prolongation_type = ProlongationType::Constant; - } else if (pro_int == "Linear") { + } else if (pro_int == "Linear") { prolongation_type = ProlongationType::Linear; - } else if (pro_int == "Kwak") { + } else if (pro_int == "Kwak") { prolongation_type = ProlongationType::Kwak; - } else { + } else { PARTHENON_FAIL("Invalid user prolongation type."); } } @@ -186,16 +187,19 @@ class PoissonEquation { template parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, std::shared_ptr> &md) { - if (prolongation_type == ProlongationType::Constant) { - return tl.AddTask(depends_on, ProlongateImpl, md); - } else if (prolongation_type == ProlongationType::Linear) { - return tl.AddTask(depends_on, ProlongateImpl, md); - } else if (prolongation_type == ProlongationType::Kwak) { - return tl.AddTask(depends_on, ProlongateImpl, md); + if (prolongation_type == ProlongationType::Constant) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } else if (prolongation_type == ProlongationType::Linear) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } else if (prolongation_type == ProlongationType::Kwak) { + return tl.AddTask(depends_on, ProlongateImpl, + md); } return depends_on; } - + KOKKOS_FORCEINLINE_FUNCTION static Real LinearFactor(int d, bool lo_bound, bool up_bound) { if (d == 0) return 1.0; // Indicates this dimension is not included @@ -206,6 +210,15 @@ class PoissonEquation { return 0.0; } + KOKKOS_FORCEINLINE_FUNCTION + static Real QuadraticFactor(int d) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1 || d == -1) return 30.0 / 32.0; + if (d == 3 || d == -3) return 5.0 / 32.0; + if (d == 5 || d == -5) return -3.0 / 32.0; + return 0.0; + } + template static parthenon::TaskStatus ProlongateImpl(std::shared_ptr> &md) { @@ -222,33 +235,34 @@ class PoissonEquation { int nblocks = md->NumBlocks(); std::vector include_block(nblocks, true); - for (int b = 0; b < nblocks; ++b) { - include_block[b] = md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); + for (int b = 0; b < nblocks; ++b) { + include_block[b] = + md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); } const auto desc = parthenon::MakePackDescriptor(md.get()); - const auto desc_coarse = parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); + const auto desc_coarse = + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); auto pack = desc.GetPack(md.get(), include_block); auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); - + parthenon::par_for( - "Prolongate", 0, pack.GetNBlocks() - 1, - pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, const int fi) { + "Prolongate", 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), + pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, + const int fi) { const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; const int fok = (fk - kb.s) % 2; const int foj = (fj - jb.s) % 2; const int foi = (fi - ib.s) % 2; - const bool bound[6]{ - pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), - pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), - pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), - pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), - pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), - pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; - + const bool bound[6]{pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), + pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), + pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), + pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), + pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), + pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; + if constexpr (ProlongationType::Constant == prolongation_type) { pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); } else if constexpr (ProlongationType::Linear == prolongation_type) { @@ -256,32 +270,31 @@ class PoissonEquation { for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { - const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; - const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; + const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; + const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; const int dx1 = 4 * oi - (2 * foi - 1); - pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) - * LinearFactor(dx2, bound[2], bound[3]) - * LinearFactor(dx3, bound[4], bound[5]) - * pack_coarse(b, n, ck + ok, cj + oj, ci + oi); - + pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) * + LinearFactor(dx2, bound[2], bound[3]) * + LinearFactor(dx3, bound[4], bound[5]) * + pack_coarse(b, n, ck + ok, cj + oj, ci + oi); } } } - } else if constexpr (ProlongationType::Kwak == prolongation_type) { + } else if constexpr (ProlongationType::Kwak == prolongation_type) { pack(b, n, fk, fj, fi) = 0.0; - if (ndim > 2 && !bound[4 + fok]) { + if (ndim > 2 && !bound[4 + fok]) { for (int ok = fok - 1; ok <= fok; ++ok) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); } } - if (ndim > 1 && !bound[2 + foj]) { + if (ndim > 1 && !bound[2 + foj]) { for (int oj = foj - 1; oj <= foj; ++oj) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); } } - if (ndim > 0 && !bound[foi]) { + if (ndim > 0 && !bound[foi]) { for (int oi = foi - 1; oi <= foi; ++oi) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); } } pack(b, n, fk, fj, fi) /= 2.0 * ndim; diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 68bbd3af2fb0..45f8a2acafb0 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -86,7 +86,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { bool use_exact_rhs = pin->GetOrAddBoolean("poisson", "use_exact_rhs", false); pkg->AddParam<>("use_exact_rhs", use_exact_rhs); - + std::string prolong = pin->GetOrAddString("poisson", "boundary_prolongation", "Linear"); PoissonEquation eq(pin, "poisson"); diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index 63d97cc0f0f8..ad926cd745d0 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -136,11 +136,11 @@ class CGSolver { if (params_.print_per_step && Globals::my_rank == 0) { initialize = tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual, Mesh *pm) { - Real tol = - relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) - : *res_tol; + [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual, + Mesh *pm) { + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) + : *res_tol; printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", tol); printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); From 1632222c371ebb7d2a1f382a6c723106280aaebf Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 17:42:42 -0600 Subject: [PATCH 21/62] update script --- example/poisson_gmg/plot_convergence.py | 51 ++++++++++++++++--------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/example/poisson_gmg/plot_convergence.py b/example/poisson_gmg/plot_convergence.py index 89caa569a906..06816c08f1bd 100644 --- a/example/poisson_gmg/plot_convergence.py +++ b/example/poisson_gmg/plot_convergence.py @@ -19,35 +19,48 @@ plt.style.use("tableau-colorblind10") solver = "BiCGSTAB" +solver_lbl = "BCGS" difco = 1e6 +refine = True + for bound_pro in ["Constant", "Linear"]: - for interior_pro in ["Constant", "OldLinear"]: - - p = subprocess.run( - [ - "./poisson-gmg-example", - "-i", - "parthinput.poisson", - "poisson/solver=" + solver, - "poisson/interior_D=" + str(difco), - "poisson/prolongation=" + bound_pro, - "poisson/solver_params/prolongation=" + interior_pro, - ], - capture_output=True, - text=True, - ) - dat = np.genfromtxt(p.stdout.splitlines()) + for interior_pro in ["Constant", "Linear", "SplitLin", "Kwak"]: + command = ["./poisson-gmg-example", "-i", "parthinput.poisson"] + command.append("poisson/solver=" + solver) + command.append("poisson/interior_D=" + str(difco)) + command.append("poisson/boundary_prolongation=" + bound_pro) + if interior_pro == "SplitLin": + command.append("poisson/solver_params/prolongation=Default") + else: + command.append("poisson/interior_prolongation=" + interior_pro) + command.append("poisson/solver_params/prolongation=User") + + if refine: + command.append("parthenon/static_refinement0/x1min=-1.0") + command.append("parthenon/static_refinement0/x1max=-0.75") + command.append("parthenon/static_refinement0/x2min=-1.0") + command.append("parthenon/static_refinement0/x2max=-0.75") + command.append("parthenon/static_refinement0/level=3") + p = subprocess.run(command, capture_output=True, text=True) + lines = p.stdout.splitlines() + # Ignore any initialization junk that gets spit out earlier from adding parameters + idx = lines.index("# [0] v-cycle") + dat = np.genfromtxt(lines[idx:]) + label = "{}_{}".format(solver_lbl, interior_pro) + if refine: + label = "{}_{}_Bnd{}".format(solver_lbl, interior_pro, bound_pro) plt.semilogy( dat[:, 0], dat[:, 1], - label=solver + "_" + str(difco) + "_" + bound_pro + "_" + interior_pro, + label=label.replace("Constant", "Const").replace("Linear", "Lin"), ) -plt.legend(loc="upper right") -plt.ylim([1.0e-14, 1e2]) +plt.legend() +plt.ylim([1.0e-14, 1e4]) plt.xlim([0, 40]) plt.xlabel("# of V-cycles") plt.ylabel("RMS Residual") +plt.title("$D_{int} = 10^6$ w/ Refinement") plt.savefig("convergence_1e6.pdf") From 23aeba30b86c3db34b1939e1faf867644d286bf7 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 18:11:50 -0600 Subject: [PATCH 22/62] fix strange compiler error --- example/poisson_gmg/poisson_equation.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp index d8c7fc692ba6..1d83013cbb49 100644 --- a/example/poisson_gmg/poisson_equation.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -262,7 +262,9 @@ class PoissonEquation { pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; - + // Use both pack and pack_coarse outside of the constexpr if + // statements to prevent compilation errors in some CUDA compilers + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); if constexpr (ProlongationType::Constant == prolongation_type) { pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); } else if constexpr (ProlongationType::Linear == prolongation_type) { From cdaee59541b477c672061be85b17813f87efcd72 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 25 Sep 2024 18:28:25 -0600 Subject: [PATCH 23/62] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19ca04abc04d..7373e6fae8d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1174]](https://github.com/parthenon-hpc-lab/parthenon/pull/1174) Add CG solver and custom solver prolongation operator options - [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades From e477e2a37e4f08d9dae4c2d829dc9b701378f53f Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 26 Sep 2024 13:43:33 -0600 Subject: [PATCH 24/62] small --- src/basic_types.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/basic_types.hpp b/src/basic_types.hpp index 4db850021984..401aa90f3b9d 100644 --- a/src/basic_types.hpp +++ b/src/basic_types.hpp @@ -42,6 +42,7 @@ using Real = double; struct IndexRange { int s = 0; /// Starting Index (inclusive) int e = 0; /// Ending Index (inclusive) + int size() const { return e - s + 1;} operator std::pair() const { return {s, e}; } }; From 9365bb2df0d47924eb52bc9b65a2cb4a32cc4788 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 2 Oct 2024 12:43:40 -0600 Subject: [PATCH 25/62] shits compiling --- example/poisson_gmg/CMakeLists.txt | 1 + example/poisson_gmg/poisson_driver.cpp | 7 +- .../poisson_gmg/poisson_equation_stages.hpp | 443 ++++++++++++++++++ example/poisson_gmg/poisson_package.cpp | 9 +- src/CMakeLists.txt | 2 + src/interface/make_pack_descriptor.hpp | 5 + src/solvers/bicgstab_solver.hpp | 12 +- src/solvers/cg_solver.hpp | 13 +- src/solvers/cg_solver_stages.hpp | 263 +++++++++++ src/solvers/mg_solver.hpp | 28 +- src/solvers/solver_utils_stages.hpp | 232 +++++++++ 11 files changed, 985 insertions(+), 30 deletions(-) create mode 100644 example/poisson_gmg/poisson_equation_stages.hpp create mode 100644 src/solvers/cg_solver_stages.hpp create mode 100644 src/solvers/solver_utils_stages.hpp diff --git a/example/poisson_gmg/CMakeLists.txt b/example/poisson_gmg/CMakeLists.txt index d4ccb8d622f5..e63cdd274457 100644 --- a/example/poisson_gmg/CMakeLists.txt +++ b/example/poisson_gmg/CMakeLists.txt @@ -18,6 +18,7 @@ if( "poisson-gmg-example" IN_LIST DRIVER_LIST OR NOT PARTHENON_DISABLE_EXAMPLES) poisson_driver.cpp poisson_driver.hpp poisson_equation.hpp + poisson_equation_stages.hpp poisson_package.cpp poisson_package.hpp main.cpp diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index b6429e2f4832..3b2b5f2e5703 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -26,6 +26,7 @@ #include "parthenon/driver.hpp" #include "poisson_driver.hpp" #include "poisson_equation.hpp" +#include "poisson_equation_stages.hpp" #include "poisson_package.hpp" #include "prolong_restrict/prolong_restrict.hpp" #include "solvers/bicgstab_solver.hpp" @@ -109,13 +110,13 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto solve = zero_u; if (solver == "BiCGSTAB") { auto setup = bicgstab_solver->AddSetupTasks(tl, zero_u, i, pmesh); - solve = bicgstab_solver->AddTasks(tl, setup, pmesh, i); + solve = bicgstab_solver->AddTasks(tl, setup, i, pmesh); } else if (solver == "CG") { auto setup = cg_solver->AddSetupTasks(tl, zero_u, i, pmesh); - solve = cg_solver->AddTasks(tl, setup, pmesh, i); + solve = cg_solver->AddTasks(tl, setup, i, pmesh); } else if (solver == "MG") { auto setup = mg_solver->AddSetupTasks(tl, zero_u, i, pmesh); - solve = mg_solver->AddTasks(tl, setup, pmesh, i); + solve = mg_solver->AddTasks(tl, setup, i, pmesh); } else { PARTHENON_FAIL("Unknown solver type."); } diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp new file mode 100644 index 000000000000..656d83d4bd70 --- /dev/null +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -0,0 +1,443 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ +#define EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ + +#include +#include +#include +#include + +#include +#include + +#include "poisson_package.hpp" + +namespace poisson_package { + +// This class implement methods for calculating A.x = y and returning the diagonal of A, +// where A is the the matrix representing the discretized Poisson equation on the grid. +// Here we implement the Laplace operator in terms of a flux divergence to (potentially) +// consistently deal with coarse fine boundaries on the grid. Only the routines Ax and +// SetDiagonal need to be defined for interfacing this with solvers. The other methods +// are internal, but can't be marked private or protected because they launch kernels +// on device. +template +class PoissonEquationStages { + public: + bool do_flux_cor = false; + bool set_flux_boundary = false; + bool include_flux_dx = false; + enum class ProlongationType { Constant, Linear, Kwak }; + ProlongationType prolongation_type = ProlongationType::Constant; + + PoissonEquationStages(parthenon::ParameterInput *pin, const std::string &label) { + do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); + set_flux_boundary = pin->GetOrAddBoolean(label, "set_flux_boundary", false); + include_flux_dx = + (pin->GetOrAddString(label, "boundary_prolongation", "Linear") == "Constant"); + auto pro_int = pin->GetOrAddString(label, "interior_prolongation", "Linear"); + if (pro_int == "Constant") { + prolongation_type = ProlongationType::Constant; + } else if (pro_int == "Linear") { + prolongation_type = ProlongationType::Linear; + } else if (pro_int == "Kwak") { + prolongation_type = ProlongationType::Kwak; + } else { + PARTHENON_FAIL("Invalid user prolongation type."); + } + } + + // Add tasks to calculate the result of the matrix A (which is implicitly defined by + // this class) being applied to x_t and store it in field out_t + parthenon::TaskID Ax(parthenon::TaskList &tl, parthenon::TaskID depends_on, + std::shared_ptr> &md_mat, + std::shared_ptr> &md_in, + std::shared_ptr> &md_out) { + auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md_mat, md_in); + //if (set_flux_boundary) { + // flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md, include_flux_dx); + //} + if (do_flux_cor && !(md_mat->grid.type == parthenon::GridType::two_level_composite)) { + auto start_flxcor = + tl.AddTask(flux_res, parthenon::StartReceiveFluxCorrections, md_in); + auto send_flxcor = tl.AddTask(flux_res, parthenon::LoadAndSendFluxCorrections, md_in); + auto recv_flxcor = tl.AddTask(start_flxcor, parthenon::ReceiveFluxCorrections, md_in); + flux_res = tl.AddTask(recv_flxcor, parthenon::SetFluxCorrections, md_in); + } + return tl.AddTask(flux_res, FluxMultiplyMatrix, md_in, md_out); + } + + // Calculate an approximation to the diagonal of the matrix A and store it in diag_t. + // For a uniform grid or when flux correction is ignored, this diagonal calculation + // is exact. Exactness is (probably) not required since it is just used in Jacobi + // iterations. + parthenon::TaskStatus SetDiagonal(std::shared_ptr> &md_mat, + std::shared_ptr> &md_diag) { + using namespace parthenon; + const int ndim = md_mat->GetMeshPointer()->ndim; + IndexRange ib = md_mat->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md_mat->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md_mat->GetBoundsK(IndexDomain::interior, te); + + auto pkg = md_mat->GetMeshPointer()->packages.Get("poisson_package"); + const auto alpha = pkg->Param("diagonal_alpha"); + + int nblocks = md_mat->NumBlocks(); + std::vector include_block(nblocks, true); + + auto desc_mat = parthenon::MakePackDescriptor(md_mat.get()); + auto desc_diag = parthenon::MakePackDescriptor(md_diag.get()); + auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block); + auto pack_diag = desc_diag.GetPack(md_diag.get(), include_block); + using TE = parthenon::TopologicalElement; + parthenon::par_for( + "StoreDiagonal", 0, pack_mat.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const auto &coords = pack_mat.GetCoordinates(b); + // Build the unigrid diagonal of the matrix + Real dx1 = coords.template Dxc(k, j, i); + Real diag_elem = + -(pack_mat(b, TE::F1, D_t(), k, j, i) + pack_mat(b, TE::F1, D_t(), k, j, i + 1)) / + (dx1 * dx1) - + alpha; + if (ndim > 1) { + Real dx2 = coords.template Dxc(k, j, i); + diag_elem -= + (pack_mat(b, TE::F2, D_t(), k, j, i) + pack_mat(b, TE::F2, D_t(), k, j + 1, i)) / + (dx2 * dx2); + } + if (ndim > 2) { + Real dx3 = coords.template Dxc(k, j, i); + diag_elem -= + (pack_mat(b, TE::F3, D_t(), k, j, i) + pack_mat(b, TE::F3, D_t(), k + 1, j, i)) / + (dx3 * dx3); + } + pack_diag(b, te, var_t(), k, j, i) = diag_elem; + }); + return TaskStatus::complete; + } + + static parthenon::TaskStatus + CalculateFluxes(std::shared_ptr> &md_mat, std::shared_ptr> &md) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); + + using TE = parthenon::TopologicalElement; + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + + auto desc = + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + auto pack = desc.GetPack(md.get(), include_block); + auto desc_mat = + parthenon::MakePackDescriptor(md_mat.get(), {}); + auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block); + parthenon::par_for( + "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const auto &coords = pack.GetCoordinates(b); + Real dx1 = coords.template Dxc(k, j, i); + pack.flux(b, X1DIR, var_t(), k, j, i) = + pack_mat(b, TE::F1, D_t(), k, j, i) / dx1 * + (pack(b, te, var_t(), k, j, i - 1) - pack(b, te, var_t(), k, j, i)); + if (i == ib.e) + pack.flux(b, X1DIR, var_t(), k, j, i + 1) = + pack_mat(b, TE::F1, D_t(), k, j, i + 1) / dx1 * + (pack(b, te, var_t(), k, j, i) - pack(b, te, var_t(), k, j, i + 1)); + + if (ndim > 1) { + Real dx2 = coords.template Dxc(k, j, i); + pack.flux(b, X2DIR, var_t(), k, j, i) = + pack_mat(b, TE::F2, D_t(), k, j, i) * + (pack(b, te, var_t(), k, j - 1, i) - pack(b, te, var_t(), k, j, i)) / dx2; + if (j == jb.e) + pack.flux(b, X2DIR, var_t(), k, j + 1, i) = + pack_mat(b, TE::F2, D_t(), k, j + 1, i) * + (pack(b, te, var_t(), k, j, i) - pack(b, te, var_t(), k, j + 1, i)) / + dx2; + } + + if (ndim > 2) { + Real dx3 = coords.template Dxc(k, j, i); + pack.flux(b, X3DIR, var_t(), k, j, i) = + pack_mat(b, TE::F3, D_t(), k, j, i) * + (pack(b, te, var_t(), k - 1, j, i) - pack(b, te, var_t(), k, j, i)) / dx3; + if (k == kb.e) + pack.flux(b, X2DIR, var_t(), k + 1, j, i) = + pack_mat(b, TE::F3, D_t(), k + 1, j, i) * + (pack(b, te, var_t(), k, j, i) - pack(b, te, var_t(), k + 1, j, i)) / + dx3; + } + }); + return TaskStatus::complete; + } + + template + parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, + std::shared_ptr> &md) { + if (prolongation_type == ProlongationType::Constant) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } else if (prolongation_type == ProlongationType::Linear) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } else if (prolongation_type == ProlongationType::Kwak) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } + return depends_on; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real LinearFactor(int d, bool lo_bound, bool up_bound) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1) return (2.0 + !up_bound) / 4.0; + if (d == -1) return (2.0 + !lo_bound) / 4.0; + if (d == 3) return !up_bound / 4.0; + if (d == -3) return !lo_bound / 4.0; + return 0.0; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real QuadraticFactor(int d) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1 || d == -1) return 30.0 / 32.0; + if (d == 3 || d == -3) return 5.0 / 32.0; + if (d == 5 || d == -5) return -3.0 / 32.0; + return 0.0; + } + + template + static parthenon::TaskStatus + ProlongateImpl(std::shared_ptr> &md) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + IndexRange ib = md->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBoundsK(IndexDomain::interior); + IndexRange cib = md->GetBoundsI(CellLevel::coarse, IndexDomain::interior); + IndexRange cjb = md->GetBoundsJ(CellLevel::coarse, IndexDomain::interior); + IndexRange ckb = md->GetBoundsK(CellLevel::coarse, IndexDomain::interior); + + using TE = parthenon::TopologicalElement; + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + for (int b = 0; b < nblocks; ++b) { + include_block[b] = + md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); + } + const auto desc = parthenon::MakePackDescriptor(md.get()); + const auto desc_coarse = + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); + auto pack = desc.GetPack(md.get(), include_block); + auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); + + parthenon::par_for( + "Prolongate", 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), + pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, + const int fi) { + const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; + const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; + const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; + const int fok = (fk - kb.s) % 2; + const int foj = (fj - jb.s) % 2; + const int foi = (fi - ib.s) % 2; + const bool bound[6]{pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), + pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), + pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), + pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), + pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), + pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; + // Use both pack and pack_coarse outside of the constexpr if + // statements to prevent compilation errors in some CUDA compilers + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); + if constexpr (ProlongationType::Constant == prolongation_type) { + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); + } else if constexpr (ProlongationType::Linear == prolongation_type) { + pack(b, n, fk, fj, fi) = 0.0; + for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { + for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { + for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { + const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; + const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; + const int dx1 = 4 * oi - (2 * foi - 1); + pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) * + LinearFactor(dx2, bound[2], bound[3]) * + LinearFactor(dx3, bound[4], bound[5]) * + pack_coarse(b, n, ck + ok, cj + oj, ci + oi); + } + } + } + } else if constexpr (ProlongationType::Kwak == prolongation_type) { + pack(b, n, fk, fj, fi) = 0.0; + if (ndim > 2 && !bound[4 + fok]) { + for (int ok = fok - 1; ok <= fok; ++ok) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); + } + } + if (ndim > 1 && !bound[2 + foj]) { + for (int oj = foj - 1; oj <= foj; ++oj) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); + } + } + if (ndim > 0 && !bound[foi]) { + for (int oi = foi - 1; oi <= foi; ++oi) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); + } + } + pack(b, n, fk, fj, fi) /= 2.0 * ndim; + } + }); + return TaskStatus::complete; + } + + static parthenon::TaskStatus + SetFluxBoundaries(std::shared_ptr> &md, bool do_flux_dx) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + IndexRange ib = md->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBoundsK(IndexDomain::interior); + + using TE = parthenon::TopologicalElement; + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + + auto desc = + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + auto pack = desc.GetPack(md.get(), include_block); + const std::size_t scratch_size_in_bytes = 0; + const std::size_t scratch_level = 1; + + const parthenon::Indexer3D idxers[6]{ + parthenon::Indexer3D(kb, jb, {ib.s, ib.s}), + parthenon::Indexer3D(kb, jb, {ib.e + 1, ib.e + 1}), + parthenon::Indexer3D(kb, {jb.s, jb.s}, ib), + parthenon::Indexer3D(kb, {jb.e + 1, jb.e + 1}, ib), + parthenon::Indexer3D({kb.s, kb.s}, jb, ib), + parthenon::Indexer3D({kb.e + 1, kb.e + 1}, jb, ib)}; + constexpr int x1off[6]{-1, 1, 0, 0, 0, 0}; + constexpr int x2off[6]{0, 0, -1, 1, 0, 0}; + constexpr int x3off[6]{0, 0, 0, 0, -1, 1}; + constexpr TE tes[6]{TE::F1, TE::F1, TE::F2, TE::F2, TE::F3, TE::F3}; + constexpr int dirs[6]{X1DIR, X1DIR, X2DIR, X2DIR, X3DIR, X3DIR}; + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "SetFluxBoundaries", DevExecSpace(), + scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + const auto &coords = pack.GetCoordinates(b); + const int gid = pack.GetGID(b); + const int level = pack.GetLevel(b, 0, 0, 0); + const Real dxs[3]{coords.template Dxc(), coords.template Dxc(), + coords.template Dxc()}; + for (int face = 0; face < ndim * 2; ++face) { + const Real dx = dxs[dirs[face] - 1]; + const auto &idxer = idxers[face]; + const auto dir = dirs[face]; + const auto te = tes[face]; + // Impose the zero Dirichlet boundary condition at the actual boundary + if (pack.IsPhysicalBoundary(b, x3off[face], x2off[face], x1off[face])) { + const int koff = x3off[face] > 0 ? -1 : 0; + const int joff = x2off[face] > 0 ? -1 : 0; + const int ioff = x1off[face] > 0 ? -1 : 0; + const int sign = x1off[face] + x2off[face] + x3off[face]; + parthenon::par_for_inner( + DEFAULT_INNER_LOOP_PATTERN, member, 0, idxer.size() - 1, + [&](const int idx) { + const auto [k, j, i] = idxer(idx); + pack.flux(b, dir, var_t(), k, j, i) = + sign * pack(b, te, D_t(), k, j, i) * + pack(b, var_t(), k + koff, j + joff, i + ioff) / (0.5 * dx); + }); + } + // Correct for size of neighboring zone at fine-coarse boundary when using + // constant prolongation + if (do_flux_dx && + pack.GetLevel(b, x3off[face], x2off[face], x1off[face]) == level - 1) { + parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, + idxer.size() - 1, [&](const int idx) { + const auto [k, j, i] = idxer(idx); + pack.flux(b, dir, var_t(), k, j, i) /= 1.5; + }); + } + } + }); + return TaskStatus::complete; + } + + // Calculate A in_t = out_t (in the region covered by md) for a given set of fluxes + // calculated with in_t (which have possibly been corrected at coarse fine boundaries) + static parthenon::TaskStatus + FluxMultiplyMatrix(std::shared_ptr> &md, + std::shared_ptr> &md_out) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); + + auto pkg = md->GetMeshPointer()->packages.Get("poisson_package"); + const auto alpha = pkg->Param("diagonal_alpha"); + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + + static auto desc = + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + static auto desc_out = + parthenon::MakePackDescriptor(md_out.get()); + auto pack = desc.GetPack(md.get(), include_block); + auto pack_out = desc_out.GetPack(md_out.get(), include_block); + parthenon::par_for( + "FluxMultiplyMatrix", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const auto &coords = pack.GetCoordinates(b); + Real dx1 = coords.template Dxc(k, j, i); + pack_out(b, te, var_t(), k, j, i) = -alpha * pack(b, te, var_t(), k, j, i); + pack_out(b, te, var_t(), k, j, i) += (pack.flux(b, X1DIR, var_t(), k, j, i) - + pack.flux(b, X1DIR, var_t(), k, j, i + 1)) / + dx1; + + if (ndim > 1) { + Real dx2 = coords.template Dxc(k, j, i); + pack_out(b, te, var_t(), k, j, i) += (pack.flux(b, X2DIR, var_t(), k, j, i) - + pack.flux(b, X2DIR, var_t(), k, j + 1, i)) / + dx2; + } + + if (ndim > 2) { + Real dx3 = coords.template Dxc(k, j, i); + pack_out(b, te, var_t(), k, j, i) += (pack.flux(b, X3DIR, var_t(), k, j, i) - + pack.flux(b, X3DIR, var_t(), k + 1, j, i)) / + dx3; + } + }); + return TaskStatus::complete; + } +}; + +} // namespace poisson_package + +#endif // EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 45f8a2acafb0..c11b59513787 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -25,12 +25,14 @@ #include #include #include +#include #include #include #include "defs.hpp" #include "kokkos_abstraction.hpp" #include "poisson_equation.hpp" +#include "poisson_equation_stages.hpp" #include "poisson_package.hpp" using namespace parthenon::package::prelude; @@ -107,7 +109,12 @@ std::shared_ptr Initialize(ParameterInput *pin) { parthenon::solvers::CGSolver cg_solver(pkg.get(), cg_params, eq); pkg->AddParam<>("MGCGsolver", cg_solver, parthenon::Params::Mutability::Mutable); - + + using PoissEqStages = poisson_package::PoissonEquationStages; + parthenon::solvers::CGSolverStages cgstages_solver({u::name()}, "base", "u", "rhs", + pkg.get(), cg_params, + PoissEqStages(pin, "poisson")); + using namespace parthenon::refinement_ops; auto mD = Metadata( {Metadata::Independent, Metadata::OneCopy, Metadata::Face, Metadata::GMGRestrict}); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 62e84c16d963..cddfc68dc30d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -228,8 +228,10 @@ add_library(parthenon solvers/bicgstab_solver.hpp solvers/cg_solver.hpp + solvers/cg_solver_stages.hpp solvers/mg_solver.hpp solvers/solver_utils.hpp + solvers/solver_utils_stages.hpp tasks/tasks.cpp tasks/tasks.hpp diff --git a/src/interface/make_pack_descriptor.hpp b/src/interface/make_pack_descriptor.hpp index 2a604338cdb7..1805f84ba678 100644 --- a/src/interface/make_pack_descriptor.hpp +++ b/src/interface/make_pack_descriptor.hpp @@ -85,6 +85,11 @@ inline auto MakePackDescriptor(StateDescriptor *psd, const std::vector +inline auto MakePackDescriptor(MeshData *pmd, Args&&...args) { + return MakePackDescriptor(pmd->GetMeshPointer()->resolved_packages.get(), std::forward(args)...); +} + template inline auto MakePackDescriptor(MeshBlockData *pmbd, const std::vector &flags = {}, diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index 9c173493fd62..7a14982097b1 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -70,7 +70,7 @@ struct BiCGSTABParams { // that takes a field associated with x_t and applies // the matrix A to it and stores the result in y_t. template -class BiCGSTABSolver { +class BiCGSTABSolver : public SolverBase { public: PARTHENON_INTERNALSOLVERVARIABLE(u, rhat0); PARTHENON_INTERNALSOLVERVARIABLE(u, v); @@ -119,8 +119,7 @@ class BiCGSTABSolver { pkg->AddField(diag::name(), m_no_ghost); } - template - TaskID AddSetupTasks(TL_t &tl, TaskID dependence, int partition, Mesh *pmesh) { + TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { if (params_.precondition_type == Preconditioner::Multigrid) { return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); } else if (params_.precondition_type == Preconditioner::Diagonal) { @@ -132,7 +131,7 @@ class BiCGSTABSolver { } } - TaskID AddTasks(TaskList &tl, TaskID dependence, Mesh *pmesh, const int partition) { + TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { using namespace utils; TaskID none; auto &md = pmesh->mesh_data.GetOrAdd(container_, partition); @@ -341,9 +340,6 @@ class BiCGSTABSolver { Real GetSquaredResidualSum() const { return residual.val; } int GetCurrentIterations() const { return iter_counter; } - Real GetFinalResidual() const { return final_residual; } - int GetFinalIterations() const { return final_iteration; } - BiCGSTABParams &GetParams() { return params_; } protected: @@ -353,8 +349,6 @@ class BiCGSTABSolver { AllReduce rtr, pAp, rhat0v, rhat0r, ts, tt, residual, rhs2; Real rhat0r_old; equations eqs_; - Real final_residual; - int final_iteration; std::string container_; }; diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index ad926cd745d0..85a789423c91 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -61,7 +61,7 @@ struct CGParams { // that takes a field associated with x_t and applies // the matrix A to it and stores the result in y_t. template -class CGSolver { +class CGSolver : public SolverBase { public: PARTHENON_INTERNALSOLVERVARIABLE(u, x); PARTHENON_INTERNALSOLVERVARIABLE(u, r); @@ -99,12 +99,11 @@ class CGSolver { pkg->AddField(p::name(), m_no_ghost); } - template - TaskID AddSetupTasks(TL_t &tl, TaskID dependence, int partition, Mesh *pmesh) { + TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); } - TaskID AddTasks(TaskList &tl, TaskID dependence, Mesh *pmesh, const int partition) { + TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { using namespace utils; TaskID none; auto &md = pmesh->mesh_data.GetOrAdd(container_, partition); @@ -252,9 +251,6 @@ class CGSolver { Real GetSquaredResidualSum() const { return residual.val; } int GetCurrentIterations() const { return iter_counter; } - Real GetFinalResidual() const { return final_residual; } - int GetFinalIterations() const { return final_iteration; } - CGParams &GetParams() { return params_; } protected: @@ -264,8 +260,7 @@ class CGSolver { AllReduce ru, pAp, residual, rhs2; Real ru_old; equations eqs_; - Real final_residual; - int final_iteration; + std::string container_; }; diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp new file mode 100644 index 000000000000..103e1043fd08 --- /dev/null +++ b/src/solvers/cg_solver_stages.hpp @@ -0,0 +1,263 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_CG_SOLVER_STAGES_HPP_ +#define SOLVERS_CG_SOLVER_STAGES_HPP_ + +#include +#include +#include +#include +#include +#include + +#include "interface/mesh_data.hpp" +#include "interface/meshblock_data.hpp" +#include "interface/state_descriptor.hpp" +#include "kokkos_abstraction.hpp" +#include "solvers/mg_solver.hpp" +#include "solvers/cg_solver.hpp" +//#include "solvers/solver_utils.hpp" +#include "solvers/solver_utils_stages.hpp" +#include "tasks/tasks.hpp" +#include "utils/type_list.hpp" + +namespace parthenon { + +namespace solvers { + +// The equations class must include a template method +// +// template +// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) +// +// that takes a field associated with x_t and applies +// the matrix A to it and stores the result in y_t. +template +class CGSolverStages : public SolverBase { + + std::vector sol_fields; + // Name of user defined container that should contain information required to + // calculate the matrix part of the matrix vector product + std::string container_base; + // User defined container in which the solution will reside, only needs to contain sol_fields + // TODO(LFR): Also allow for an initial guess to come in here + std::string container_u; + // User defined container containing the rhs vector, only needs to contain sol_fields + std::string container_rhs; + // Internal containers for solver which create deep copies of sol_fields + std::string container_x, container_r, container_v, container_p; + + public: + + CGSolverStages(const std::vector &fields, + const std::string &container_base, + const std::string &container_u, + const std::string &container_rhs, + StateDescriptor *pkg, + CGParams params_in, + const equations &eq_in = equations()) + : sol_fields(fields), + container_base(container_base), + container_u(container_u), + container_rhs(container_rhs), + params_(params_in), + iter_counter(0), + eqs_(eq_in) { + std::string solver_id = "cg"; + container_x = solver_id + "_x"; + container_r = solver_id + "_r"; + container_v = solver_id + "_v"; + container_p = solver_id + "_p"; + } + + TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { + return dependence; + //return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); + } + + TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { + using namespace StageUtils; + TaskID none; + // Should contain all fields necessary for applying the matrix to a give state vector, + // e.g. diffusion coefficients and diagonal, these will not be modified by the solvers + auto &md_base = pmesh->mesh_data.GetOrAdd(container_base, partition); + // Container in which the solution is stored and with which the downstream user can + // interact. This container only requires the fields in sol_fields + auto &md_u = pmesh->mesh_data.GetOrAdd(container_u, partition); + // Container of the rhs, only requires fields in sol_fields + auto &md_rhs = pmesh->mesh_data.GetOrAdd(container_rhs, partition); + // Internal solver containers + auto &md_x = pmesh->mesh_data.Add(container_x, md_u, sol_fields); + auto &md_r = pmesh->mesh_data.Add(container_r, md_u, sol_fields); + // TODO(LFR): The v container can probably be removed and the u container used in its stead + auto &md_v = pmesh->mesh_data.Add(container_v, md_u, sol_fields); + auto &md_p = pmesh->mesh_data.Add(container_p, md_u, sol_fields); + + iter_counter = 0; + bool multilevel = pmesh->multilevel; + + // Initialization: u <- 0, r <- rhs, p <- 0, ru <- 1 + auto zero_u = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_u); + auto zero_v = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_v); + auto zero_x = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_x); + auto zero_p = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_p); + auto copy_r = tl.AddTask(dependence, TF(CopyData), sol_fields, md_rhs, md_r); + auto get_rhs2 = none; + if (params_.relative_residual || params_.print_per_step) + get_rhs2 = DotProduct(dependence, tl, &rhs2, sol_fields, md_rhs, md_rhs); + auto initialize = tl.AddTask( + TaskQualifier::once_per_region | TaskQualifier::local_sync, + zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, "zero factors", + [](CGSolverStages *solver) { + solver->iter_counter = -1; + solver->ru.val = std::numeric_limits::max(); + return TaskStatus::complete; + }, + this); + + if (params_.print_per_step && Globals::my_rank == 0) { + initialize = tl.AddTask( + TaskQualifier::once_per_region, initialize, "print to screen", + [&](CGSolverStages *solver, std::shared_ptr res_tol, bool relative_residual, + Mesh *pm) { + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) + : *res_tol; + printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", + tol); + printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); + return TaskStatus::complete; + }, + this, params_.residual_tolerance, params_.relative_residual, pmesh); + } + + // BEGIN ITERATIVE TASKS + auto [itl, solver_id] = tl.AddSublist(initialize, {1, params_.max_iters}); + + auto sync = itl.AddTask(TaskQualifier::local_sync, none, + []() { return TaskStatus::complete; }); + auto reset = itl.AddTask( + TaskQualifier::once_per_region, sync, "update values", + [](CGSolverStages *solver) { + solver->ru_old = solver->ru.val; + solver->iter_counter++; + return TaskStatus::complete; + }, + this); + + // 1. u <- M r + auto precon = reset; + if (params_.precondition) { + //auto set_rhs = itl.AddTask(precon, TF(CopyData), sol_fields, md_r, m_rhs); + //auto zero_u = itl.AddTask(precon, TF(SetToZero), sol_fields, md_u); + //precon = + // preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + } else { + precon = itl.AddTask(precon, TF(CopyData), sol_fields, md_r, md_u); + } + + // 2. beta <- r dot u / r dot u {old} + auto get_ru = DotProduct(precon, itl, &ru, sol_fields, md_r, md_u); + + // 3. p <- u + beta p + auto correct_p = itl.AddTask( + get_ru, "p <- u + beta p", + [](CGSolverStages *solver, std::shared_ptr> &md_u, std::shared_ptr> &md_p) { + Real beta = solver->iter_counter > 0 ? solver->ru.val / solver->ru_old : 0.0; + return AddFieldsAndStore(solver->sol_fields, md_u, md_p, md_p, 1.0, beta); + }, + this, md_u, md_p); + + // 4. v <- A p + auto comm = + AddBoundaryExchangeTasks(correct_p, itl, md_p, multilevel); + auto get_v = eqs_.template Ax(itl, comm, md_base, md_p, md_v); + + // 5. alpha <- r dot u / p dot v (calculate denominator) + auto get_pAp = DotProduct(get_v, itl, &pAp, sol_fields, md_p, md_v); + + // 6. x <- x + alpha p + auto correct_x = itl.AddTask( + get_pAp, "x <- x + alpha p", + [](CGSolverStages *solver, + std::shared_ptr> &md_x, + std::shared_ptr> &md_p) { + Real alpha = solver->ru.val / solver->pAp.val; + return AddFieldsAndStore(solver->sol_fields, md_x, md_p, md_x, 1.0, alpha); + }, + this, md_x, md_p); + + // 6. r <- r - alpha A p + auto correct_r = itl.AddTask( + get_pAp, "r <- r - alpha A p", + [](CGSolverStages *solver, + std::shared_ptr> &md_r, + std::shared_ptr> &md_v) { + Real alpha = solver->ru.val / solver->pAp.val; + return AddFieldsAndStore(solver->sol_fields, md_r, md_v, md_r, 1.0, -alpha); + }, + this, md_r, md_v); + + // 7. Check and print out residual + auto get_res = DotProduct(correct_r, itl, &residual, sol_fields, md_r, md_r); + + auto print = itl.AddTask( + TaskQualifier::once_per_region, get_res, + [&](CGSolverStages *solver, Mesh *pmesh) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + if (Globals::my_rank == 0 && solver->params_.print_per_step) + printf("%i %e\n", solver->iter_counter, rms_res); + return TaskStatus::complete; + }, + this, pmesh); + + auto check = itl.AddTask( + TaskQualifier::completion, get_res | correct_x, "completion", + [](CGSolverStages *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, + bool relative_residual) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) + : *res_tol; + if (rms_res < tol || solver->iter_counter >= max_iter) { + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + return TaskStatus::complete; + } + return TaskStatus::iterate; + }, + this, pmesh, params_.max_iters, params_.residual_tolerance, + params_.relative_residual); + + return tl.AddTask(solver_id, TF(CopyData), sol_fields, md_x, md_u); + } + + Real GetSquaredResidualSum() const { return residual.val; } + int GetCurrentIterations() const { return iter_counter; } + + CGParams &GetParams() { return params_; } + + protected: + CGParams params_; + int iter_counter; + AllReduce ru, pAp, residual, rhs2; + Real ru_old; + equations eqs_; +}; + +} // namespace solvers +} // namespace parthenon + +#endif // SOLVERS_CG_SOLVER_STAGES_HPP_ diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index d820d3fde547..28dbc74dc5dd 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -58,6 +58,23 @@ struct MGParams { } }; +class SolverBase { + public: + virtual ~SolverBase(){} + + virtual TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) = 0; + virtual TaskID AddTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) = 0; + + Real GetFinalResidual() const { return final_residual; } + int GetFinalIterations() const { return final_iteration; } + + protected: + Real final_residual; + int final_iteration; +}; + // The equations class must include a template method // // template @@ -73,7 +90,7 @@ struct MGParams { // That stores the (possibly approximate) diagonal of matrix A in the field // associated with the type diag_t. This is used for Jacobi iteration. template -class MGSolver { +class MGSolver : public SolverBase { public: PARTHENON_INTERNALSOLVERVARIABLE( u, res_err); // residual on the way up and error on the way down @@ -119,7 +136,7 @@ class MGSolver { pkg->AddField(D::name(), mD); } - TaskID AddTasks(TaskList &tl, TaskID dependence, Mesh *pmesh, const int partition) { + TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { using namespace utils; TaskID none; auto [itl, solve_id] = tl.AddSublist(dependence, {1, this->params_.max_iters}); @@ -188,8 +205,7 @@ class MGSolver { return post_sync; } - template - TaskID AddSetupTasks(TL_t &tl, TaskID dependence, int partition, Mesh *pmesh) { + TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { using namespace utils; int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, @@ -207,16 +223,12 @@ class MGSolver { Real GetSquaredResidualSum() const { return residual.val; } int GetCurrentIterations() const { return iter_counter; } - Real GetFinalResidual() const { return final_residual; } - int GetFinalIterations() const { return final_iteration; } protected: MGParams params_; int iter_counter; AllReduce residual; equations eqs_; - Real final_residual; - int final_iteration; std::string container_; // These functions apparently have to be public to compile with cuda since diff --git a/src/solvers/solver_utils_stages.hpp b/src/solvers/solver_utils_stages.hpp new file mode 100644 index 000000000000..ddca07494366 --- /dev/null +++ b/src/solvers/solver_utils_stages.hpp @@ -0,0 +1,232 @@ +//======================================================================================== +// (C) (or copyright) 2021-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_SOLVER_UTILS_STAGES_HPP_ +#define SOLVERS_SOLVER_UTILS_STAGES_HPP_ + +#include +#include +#include +#include +#include +#include + +#include "kokkos_abstraction.hpp" + +namespace parthenon { + +namespace solvers { + +namespace StageUtils { + +template +TaskStatus CopyData(const std::vector &fields, + const std::shared_ptr> &md_in, + const std::shared_ptr> &md_out) { + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md_in->GetBoundsI(IndexDomain::entire, te); + IndexRange jb = md_in->GetBoundsJ(IndexDomain::entire, te); + IndexRange kb = md_in->GetBoundsK(IndexDomain::entire, te); + + static auto desc = parthenon::MakePackDescriptor(md_in.get(), fields); + auto pack_in = desc.GetPack(md_in.get(), only_fine_on_composite); + auto pack_out = desc.GetPack(md_out.get(), only_fine_on_composite); + const int scratch_size = 0; + const int scratch_level = 0; + // Warning: This inner loop strategy only works because we are using IndexDomain::entire + const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "CopyData", DevExecSpace(), scratch_size, scratch_level, + 0, pack_in.GetNBlocks() - 1, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + const int nvars = + pack_in.GetUpperBound(b) - pack_in.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) { + Real *in = &pack_in(b, te, c, kb.s, jb.s, ib.s); + Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); + parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, + npoints_inner - 1, + [&](const int idx) { out[idx] = in[idx]; }); + } + }); + return TaskStatus::complete; +} + +template +TaskStatus AddFieldsAndStoreInteriorSelect(const std::vector &fields, + const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + const std::shared_ptr> &md_out, + Real wa = 1.0, Real wb = 1.0, + bool only_interior_blocks = false) { + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md_a->GetBoundsI(IndexDomain::entire, te); + IndexRange jb = md_a->GetBoundsJ(IndexDomain::entire, te); + IndexRange kb = md_a->GetBoundsK(IndexDomain::entire, te); + + int nblocks = md_a->NumBlocks(); + std::vector include_block(nblocks, true); + if (only_interior_blocks) { + // The neighbors array will only be set for a block if its a leaf block + for (int b = 0; b < nblocks; ++b) + include_block[b] = md_a->GetBlockData(b)->GetBlockPointer()->neighbors.size() == 0; + } + + static auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + auto pack_a = desc.GetPack(md_a.get(), include_block, only_fine_on_composite); + auto pack_b = desc.GetPack(md_b.get(), include_block, only_fine_on_composite); + auto pack_out = desc.GetPack(md_out.get(), include_block, only_fine_on_composite); + const int scratch_size = 0; + const int scratch_level = 0; + // Warning: This inner loop strategy only works because we are using IndexDomain::entire + const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "AddFieldsAndStore", DevExecSpace(), scratch_size, + scratch_level, 0, pack_a.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) { + Real *avar = &pack_a(b, te, c, kb.s, jb.s, ib.s); + Real *bvar = &pack_b(b, te, c, kb.s, jb.s, ib.s); + Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); + parthenon::par_for_inner( + DEFAULT_INNER_LOOP_PATTERN, member, 0, npoints_inner - 1, + [&](const int idx) { out[idx] = wa * avar[idx] + wb * bvar[idx]; }); + } + }); + return TaskStatus::complete; +} + +template +TaskStatus AddFieldsAndStore(const std::vector &fields, + const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + const std::shared_ptr> &md_out, + Real wa = 1.0, Real wb = 1.0) { + return AddFieldsAndStoreInteriorSelect( + fields, md_a, md_b, md_out, wa, wb, false); +} + +template +TaskStatus SetToZero(const std::vector &fields, const std::shared_ptr> &md) { + int nblocks = md->NumBlocks(); + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + static auto desc = parthenon::MakePackDescriptor(md.get(), fields); + auto pack = desc.GetPack(md.get(), only_fine_on_composite); + const size_t scratch_size_in_bytes = 0; + const int scratch_level = 1; + const int ng = parthenon::Globals::nghost; + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "SetFieldsToZero", DevExecSpace(), + scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + auto cb = GetIndexShape(pack(b, te, 0), ng); + const auto &coords = pack.GetCoordinates(b); + IndexRange ib = cb.GetBoundsI(IndexDomain::interior, te); + IndexRange jb = cb.GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = cb.GetBoundsK(IndexDomain::interior, te); + const int nvars = pack.GetUpperBound(b) - pack.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) { + parthenon::par_for_inner( + parthenon::inner_loop_pattern_simdfor_tag, member, kb.s, kb.e, jb.s, jb.e, + ib.s, ib.e, + [&](int k, int j, int i) { pack(b, te, c, k, j, i) = 0.0; }); + } + }); + return TaskStatus::complete; +} + +TaskStatus ADividedByB(const std::vector &fields, + const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + const std::shared_ptr> &md_out) { + IndexRange ib = md_a->GetBoundsI(IndexDomain::interior); + IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md_a->GetBoundsK(IndexDomain::interior); + + static auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + auto pack_a = desc.GetPack(md_a.get()); + auto pack_b = desc.GetPack(md_b.get()); + auto pack_out = desc.GetPack(md_out.get()); + parthenon::par_for( + DEFAULT_LOOP_PATTERN, "DotProduct", DevExecSpace(), 0, pack_a.GetNBlocks() - 1, kb.s, + kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) + pack_out(b, c, k, j, i) = + pack_a(b, c, k, j, i) / pack_b(b, c, k, j, i); + }); + return TaskStatus::complete; +} + +TaskStatus DotProductLocal(const std::vector &fields, + const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + AllReduce *adotb) { + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md_a->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md_a->GetBoundsK(IndexDomain::interior, te); + + static auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + auto pack_a = desc.GetPack(md_a.get()); + auto pack_b = desc.GetPack(md_b.get()); + Real gsum(0); + parthenon::par_reduce( + parthenon::loop_pattern_mdrange_tag, "DotProduct", DevExecSpace(), 0, + pack_a.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lsum) { + const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; + // TODO(LFR): If this becomes a bottleneck, exploit hierarchical parallelism and + // pull the loop over vars outside of the innermost loop to promote + // vectorization. + for (int c = 0; c < nvars; ++c) + lsum += pack_a(b, te, c, k, j, i) * pack_b(b, te, c, k, j, i); + }, + Kokkos::Sum(gsum)); + adotb->val += gsum; + return TaskStatus::complete; +} + +TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, + const std::vector &fields, + const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b) { + using namespace impl; + auto zero_adotb = tl.AddTask( + TaskQualifier::once_per_region | TaskQualifier::local_sync, dependency_in, + [](AllReduce *r) { + r->val = 0.0; + return TaskStatus::complete; + }, + adotb); + auto get_adotb = tl.AddTask(TaskQualifier::local_sync, zero_adotb, + DotProductLocal, fields, md_a, md_b, adotb); + auto start_global_adotb = tl.AddTask(TaskQualifier::once_per_region, get_adotb, + &AllReduce::StartReduce, adotb, MPI_SUM); + auto finish_global_adotb = + tl.AddTask(TaskQualifier::once_per_region | TaskQualifier::local_sync, + start_global_adotb, &AllReduce::CheckReduce, adotb); + return finish_global_adotb; +} + +} // namespace utils + +} // namespace solvers + +} // namespace parthenon + +#endif // SOLVERS_SOLVER_UTILS_STAGES_HPP_ From 6fb993baf41b3167b0ea7d8324223921fc7c2ecd Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 2 Oct 2024 16:51:04 -0600 Subject: [PATCH 26/62] use solver base class --- example/poisson_gmg/poisson_driver.cpp | 52 ++++++------------------- example/poisson_gmg/poisson_package.cpp | 41 +++++++++---------- 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index 3b2b5f2e5703..c44229db5284 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -31,6 +31,7 @@ #include "prolong_restrict/prolong_restrict.hpp" #include "solvers/bicgstab_solver.hpp" #include "solvers/cg_solver.hpp" +#include "solvers/cg_solver_stages.hpp" #include "solvers/mg_solver.hpp" using namespace parthenon::driver::prelude; @@ -47,23 +48,8 @@ parthenon::DriverStatus PoissonDriver::Execute() { // After running, retrieve the final residual for checking in tests auto pkg = pmesh->packages.Get("poisson_package"); - auto solver = pkg->Param("solver"); - if (solver == "BiCGSTAB") { - auto *bicgstab_solver = - pkg->MutableParam>( - "MGBiCGSTABsolver"); - final_rms_residual = bicgstab_solver->GetFinalResidual(); - } else if (solver == "CG") { - auto *cg_solver = - pkg->MutableParam>( - "MGCGsolver"); - final_rms_residual = cg_solver->GetFinalResidual(); - } else if (solver == "MG") { - auto *mg_solver = - pkg->MutableParam>( - "MGsolver"); - final_rms_residual = mg_solver->GetFinalResidual(); - } + auto psolver = pkg->Param>("solver_pointer"); + final_rms_residual = psolver->GetFinalResidual(); return DriverStatus::complete; } @@ -75,17 +61,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { TaskID none(0); auto pkg = pmesh->packages.Get("poisson_package"); - auto solver = pkg->Param("solver"); auto use_exact_rhs = pkg->Param("use_exact_rhs"); - auto *mg_solver = - pkg->MutableParam>( - "MGsolver"); - auto *bicgstab_solver = - pkg->MutableParam>( - "MGBiCGSTABsolver"); - auto *cg_solver = - pkg->MutableParam>( - "MGCGsolver"); + auto psolver = pkg->Param>("solver_pointer"); auto partitions = pmesh->GetDefaultBlockPartitions(); const int num_partitions = partitions.size(); @@ -93,6 +70,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { for (int i = 0; i < num_partitions; ++i) { TaskList &tl = region[i]; auto &md = pmesh->mesh_data.Add("base", partitions[i]); + auto &md_u = pmesh->mesh_data.Add("u", md); + auto &md_rhs = pmesh->mesh_data.Add("rhs", md); // Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is // known when we solve A.u = rhs @@ -106,20 +85,11 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // Set initial solution guess to zero auto zero_u = tl.AddTask(get_rhs, TF(solvers::utils::SetToZero), md); - - auto solve = zero_u; - if (solver == "BiCGSTAB") { - auto setup = bicgstab_solver->AddSetupTasks(tl, zero_u, i, pmesh); - solve = bicgstab_solver->AddTasks(tl, setup, i, pmesh); - } else if (solver == "CG") { - auto setup = cg_solver->AddSetupTasks(tl, zero_u, i, pmesh); - solve = cg_solver->AddTasks(tl, setup, i, pmesh); - } else if (solver == "MG") { - auto setup = mg_solver->AddSetupTasks(tl, zero_u, i, pmesh); - solve = mg_solver->AddTasks(tl, setup, i, pmesh); - } else { - PARTHENON_FAIL("Unknown solver type."); - } + zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); + zero_u = tl.AddTask(zero_u, TF(solvers::StageUtils::CopyData), std::vector{rhs::name()}, md, md_rhs); + zero_u = tl.AddTask(zero_u, TF(solvers::utils::CopyData), md_rhs); + auto setup = psolver->AddSetupTasks(tl, zero_u, i, pmesh); + auto solve = psolver->AddTasks(tl, setup, i, pmesh); // If we are using a rhs to which we know the exact solution, compare our computed // solution to the exact solution diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index c11b59513787..da3116f6d2ba 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -93,27 +93,28 @@ std::shared_ptr Initialize(ParameterInput *pin) { PoissonEquation eq(pin, "poisson"); pkg->AddParam<>("poisson_equation", eq, parthenon::Params::Mutability::Mutable); - - parthenon::solvers::MGParams mg_params(pin, "poisson/solver_params"); - parthenon::solvers::MGSolver mg_solver(pkg.get(), mg_params, - eq); - pkg->AddParam<>("MGsolver", mg_solver, parthenon::Params::Mutability::Mutable); - - parthenon::solvers::BiCGSTABParams bicgstab_params(pin, "poisson/solver_params"); - parthenon::solvers::BiCGSTABSolver bicg_solver( - pkg.get(), bicgstab_params, eq); - pkg->AddParam<>("MGBiCGSTABsolver", bicg_solver, - parthenon::Params::Mutability::Mutable); - - parthenon::solvers::CGParams cg_params(pin, "poisson/solver_params"); - parthenon::solvers::CGSolver cg_solver(pkg.get(), cg_params, - eq); - pkg->AddParam<>("MGCGsolver", cg_solver, parthenon::Params::Mutability::Mutable); - using PoissEqStages = poisson_package::PoissonEquationStages; - parthenon::solvers::CGSolverStages cgstages_solver({u::name()}, "base", "u", "rhs", - pkg.get(), cg_params, - PoissEqStages(pin, "poisson")); + std::shared_ptr psolver; + if (solver == "MG") { + parthenon::solvers::MGParams params(pin, "poisson/solver_params"); + psolver = std::make_shared>(pkg.get(), params, eq); + } else if (solver == "BiCGSTAB") { + parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); + psolver = std::make_shared>(pkg.get(), params, eq); + } else if (solver == "CG") { + parthenon::solvers::CGParams params(pin, "poisson/solver_params"); + psolver = std::make_shared>(pkg.get(), params, eq); + } else if (solver == "CGStages") { + using PoissEqStages = poisson_package::PoissonEquationStages; + parthenon::solvers::CGParams params(pin, "poisson/solver_params"); + psolver = std::make_shared>(std::vector{u::name()}, + "base", "u", "rhs", + pkg.get(), params, + PoissEqStages(pin, "poisson")); + } else { + PARTHENON_FAIL("Unknown solver type."); + } + pkg->AddParam<>("solver_pointer", psolver); using namespace parthenon::refinement_ops; auto mD = Metadata( From 49061429599f4116d11e170620c6281d0fc74990 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 2 Oct 2024 16:56:17 -0600 Subject: [PATCH 27/62] Apparently working stage based setup --- src/solvers/cg_solver_stages.hpp | 10 ++++++---- src/solvers/solver_utils_stages.hpp | 16 ++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index 103e1043fd08..a78d7a94299b 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -26,7 +26,7 @@ #include "kokkos_abstraction.hpp" #include "solvers/mg_solver.hpp" #include "solvers/cg_solver.hpp" -//#include "solvers/solver_utils.hpp" +#include "solvers/solver_utils.hpp" #include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" @@ -88,14 +88,15 @@ class CGSolverStages : public SolverBase { TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { using namespace StageUtils; TaskID none; + auto partitions = pmesh->GetDefaultBlockPartitions(); // Should contain all fields necessary for applying the matrix to a give state vector, // e.g. diffusion coefficients and diagonal, these will not be modified by the solvers - auto &md_base = pmesh->mesh_data.GetOrAdd(container_base, partition); + auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); // Container in which the solution is stored and with which the downstream user can // interact. This container only requires the fields in sol_fields - auto &md_u = pmesh->mesh_data.GetOrAdd(container_u, partition); + auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); // Container of the rhs, only requires fields in sol_fields - auto &md_rhs = pmesh->mesh_data.GetOrAdd(container_rhs, partition); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); // Internal solver containers auto &md_x = pmesh->mesh_data.Add(container_x, md_u, sol_fields); auto &md_r = pmesh->mesh_data.Add(container_r, md_u, sol_fields); @@ -162,6 +163,7 @@ class CGSolverStages : public SolverBase { //auto zero_u = itl.AddTask(precon, TF(SetToZero), sol_fields, md_u); //precon = // preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + PARTHENON_FAIL("Preconditioning not yet implemented."); } else { precon = itl.AddTask(precon, TF(CopyData), sol_fields, md_r, md_u); } diff --git a/src/solvers/solver_utils_stages.hpp b/src/solvers/solver_utils_stages.hpp index ddca07494366..155630f62e99 100644 --- a/src/solvers/solver_utils_stages.hpp +++ b/src/solvers/solver_utils_stages.hpp @@ -38,7 +38,7 @@ TaskStatus CopyData(const std::vector &fields, IndexRange jb = md_in->GetBoundsJ(IndexDomain::entire, te); IndexRange kb = md_in->GetBoundsK(IndexDomain::entire, te); - static auto desc = parthenon::MakePackDescriptor(md_in.get(), fields); + auto desc = parthenon::MakePackDescriptor(md_in.get(), fields); auto pack_in = desc.GetPack(md_in.get(), only_fine_on_composite); auto pack_out = desc.GetPack(md_out.get(), only_fine_on_composite); const int scratch_size = 0; @@ -82,7 +82,7 @@ TaskStatus AddFieldsAndStoreInteriorSelect(const std::vector &field include_block[b] = md_a->GetBlockData(b)->GetBlockPointer()->neighbors.size() == 0; } - static auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); auto pack_a = desc.GetPack(md_a.get(), include_block, only_fine_on_composite); auto pack_b = desc.GetPack(md_b.get(), include_block, only_fine_on_composite); auto pack_out = desc.GetPack(md_out.get(), include_block, only_fine_on_composite); @@ -122,7 +122,7 @@ TaskStatus SetToZero(const std::vector &fields, const std::shared_p int nblocks = md->NumBlocks(); using TE = parthenon::TopologicalElement; TE te = TE::CC; - static auto desc = parthenon::MakePackDescriptor(md.get(), fields); + auto desc = parthenon::MakePackDescriptor(md.get(), fields); auto pack = desc.GetPack(md.get(), only_fine_on_composite); const size_t scratch_size_in_bytes = 0; const int scratch_level = 1; @@ -147,7 +147,7 @@ TaskStatus SetToZero(const std::vector &fields, const std::shared_p return TaskStatus::complete; } -TaskStatus ADividedByB(const std::vector &fields, +inline TaskStatus ADividedByB(const std::vector &fields, const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, const std::shared_ptr> &md_out) { @@ -155,7 +155,7 @@ TaskStatus ADividedByB(const std::vector &fields, IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior); IndexRange kb = md_a->GetBoundsK(IndexDomain::interior); - static auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); auto pack_a = desc.GetPack(md_a.get()); auto pack_b = desc.GetPack(md_b.get()); auto pack_out = desc.GetPack(md_out.get()); @@ -171,7 +171,7 @@ TaskStatus ADividedByB(const std::vector &fields, return TaskStatus::complete; } -TaskStatus DotProductLocal(const std::vector &fields, +inline TaskStatus DotProductLocal(const std::vector &fields, const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, AllReduce *adotb) { @@ -181,7 +181,7 @@ TaskStatus DotProductLocal(const std::vector &fields, IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior, te); IndexRange kb = md_a->GetBoundsK(IndexDomain::interior, te); - static auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); auto pack_a = desc.GetPack(md_a.get()); auto pack_b = desc.GetPack(md_b.get()); Real gsum(0); @@ -201,7 +201,7 @@ TaskStatus DotProductLocal(const std::vector &fields, return TaskStatus::complete; } -TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, +inline TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, const std::vector &fields, const std::shared_ptr> &md_a, const std::shared_ptr> &md_b) { From 62414a06e2e01673763adffd692771ec7c6ee62a Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 2 Oct 2024 17:41:40 -0600 Subject: [PATCH 28/62] switch to type lists --- example/poisson_gmg/poisson_driver.cpp | 2 +- .../poisson_gmg/poisson_equation_stages.hpp | 2 + example/poisson_gmg/poisson_package.cpp | 2 +- src/solvers/cg_solver_stages.hpp | 41 ++++++++-------- src/solvers/solver_utils_stages.hpp | 47 +++++++++---------- 5 files changed, 47 insertions(+), 47 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index c44229db5284..1c872fb667bc 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -86,7 +86,7 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // Set initial solution guess to zero auto zero_u = tl.AddTask(get_rhs, TF(solvers::utils::SetToZero), md); zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); - zero_u = tl.AddTask(zero_u, TF(solvers::StageUtils::CopyData), std::vector{rhs::name()}, md, md_rhs); + zero_u = tl.AddTask(zero_u, TF(solvers::StageUtils::CopyData>), md, md_rhs); zero_u = tl.AddTask(zero_u, TF(solvers::utils::CopyData), md_rhs); auto setup = psolver->AddSetupTasks(tl, zero_u, i, pmesh); auto solve = psolver->AddTasks(tl, setup, i, pmesh); diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index 656d83d4bd70..74b38b6f07df 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -40,6 +40,8 @@ class PoissonEquationStages { bool include_flux_dx = false; enum class ProlongationType { Constant, Linear, Kwak }; ProlongationType prolongation_type = ProlongationType::Constant; + + using IndependentVars = parthenon::TypeList; PoissonEquationStages(parthenon::ParameterInput *pin, const std::string &label) { do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index da3116f6d2ba..df71b6540c82 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -107,7 +107,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { } else if (solver == "CGStages") { using PoissEqStages = poisson_package::PoissonEquationStages; parthenon::solvers::CGParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>(std::vector{u::name()}, + psolver = std::make_shared>( "base", "u", "rhs", pkg.get(), params, PoissEqStages(pin, "poisson")); diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index a78d7a94299b..3b4f981348d3 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -44,6 +44,8 @@ namespace solvers { // the matrix A to it and stores the result in y_t. template class CGSolverStages : public SolverBase { + + using FieldTL = typename equations::IndependentVars; std::vector sol_fields; // Name of user defined container that should contain information required to @@ -59,20 +61,19 @@ class CGSolverStages : public SolverBase { public: - CGSolverStages(const std::vector &fields, - const std::string &container_base, + CGSolverStages(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, StateDescriptor *pkg, CGParams params_in, const equations &eq_in = equations()) - : sol_fields(fields), - container_base(container_base), + : container_base(container_base), container_u(container_u), container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); std::string solver_id = "cg"; container_x = solver_id + "_x"; container_r = solver_id + "_r"; @@ -108,14 +109,14 @@ class CGSolverStages : public SolverBase { bool multilevel = pmesh->multilevel; // Initialization: u <- 0, r <- rhs, p <- 0, ru <- 1 - auto zero_u = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_u); - auto zero_v = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_v); - auto zero_x = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_x); - auto zero_p = tl.AddTask(dependence, TF(SetToZero), sol_fields, md_p); - auto copy_r = tl.AddTask(dependence, TF(CopyData), sol_fields, md_rhs, md_r); + auto zero_u = tl.AddTask(dependence, TF(SetToZero), md_u); + auto zero_v = tl.AddTask(dependence, TF(SetToZero), md_v); + auto zero_x = tl.AddTask(dependence, TF(SetToZero), md_x); + auto zero_p = tl.AddTask(dependence, TF(SetToZero), md_p); + auto copy_r = tl.AddTask(dependence, TF(CopyData), md_rhs, md_r); auto get_rhs2 = none; if (params_.relative_residual || params_.print_per_step) - get_rhs2 = DotProduct(dependence, tl, &rhs2, sol_fields, md_rhs, md_rhs); + get_rhs2 = DotProduct(dependence, tl, &rhs2, md_rhs, md_rhs); auto initialize = tl.AddTask( TaskQualifier::once_per_region | TaskQualifier::local_sync, zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, "zero factors", @@ -159,24 +160,24 @@ class CGSolverStages : public SolverBase { // 1. u <- M r auto precon = reset; if (params_.precondition) { - //auto set_rhs = itl.AddTask(precon, TF(CopyData), sol_fields, md_r, m_rhs); - //auto zero_u = itl.AddTask(precon, TF(SetToZero), sol_fields, md_u); + //auto set_rhs = itl.AddTask(precon, TF(CopyData), md_r, m_rhs); + //auto zero_u = itl.AddTask(precon, TF(SetToZero), md_u); //precon = // preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); PARTHENON_FAIL("Preconditioning not yet implemented."); } else { - precon = itl.AddTask(precon, TF(CopyData), sol_fields, md_r, md_u); + precon = itl.AddTask(precon, TF(CopyData), md_r, md_u); } // 2. beta <- r dot u / r dot u {old} - auto get_ru = DotProduct(precon, itl, &ru, sol_fields, md_r, md_u); + auto get_ru = DotProduct(precon, itl, &ru, md_r, md_u); // 3. p <- u + beta p auto correct_p = itl.AddTask( get_ru, "p <- u + beta p", [](CGSolverStages *solver, std::shared_ptr> &md_u, std::shared_ptr> &md_p) { Real beta = solver->iter_counter > 0 ? solver->ru.val / solver->ru_old : 0.0; - return AddFieldsAndStore(solver->sol_fields, md_u, md_p, md_p, 1.0, beta); + return AddFieldsAndStore(md_u, md_p, md_p, 1.0, beta); }, this, md_u, md_p); @@ -186,7 +187,7 @@ class CGSolverStages : public SolverBase { auto get_v = eqs_.template Ax(itl, comm, md_base, md_p, md_v); // 5. alpha <- r dot u / p dot v (calculate denominator) - auto get_pAp = DotProduct(get_v, itl, &pAp, sol_fields, md_p, md_v); + auto get_pAp = DotProduct(get_v, itl, &pAp, md_p, md_v); // 6. x <- x + alpha p auto correct_x = itl.AddTask( @@ -195,7 +196,7 @@ class CGSolverStages : public SolverBase { std::shared_ptr> &md_x, std::shared_ptr> &md_p) { Real alpha = solver->ru.val / solver->pAp.val; - return AddFieldsAndStore(solver->sol_fields, md_x, md_p, md_x, 1.0, alpha); + return AddFieldsAndStore(md_x, md_p, md_x, 1.0, alpha); }, this, md_x, md_p); @@ -206,12 +207,12 @@ class CGSolverStages : public SolverBase { std::shared_ptr> &md_r, std::shared_ptr> &md_v) { Real alpha = solver->ru.val / solver->pAp.val; - return AddFieldsAndStore(solver->sol_fields, md_r, md_v, md_r, 1.0, -alpha); + return AddFieldsAndStore(md_r, md_v, md_r, 1.0, -alpha); }, this, md_r, md_v); // 7. Check and print out residual - auto get_res = DotProduct(correct_r, itl, &residual, sol_fields, md_r, md_r); + auto get_res = DotProduct(correct_r, itl, &residual, md_r, md_r); auto print = itl.AddTask( TaskQualifier::once_per_region, get_res, @@ -243,7 +244,7 @@ class CGSolverStages : public SolverBase { this, pmesh, params_.max_iters, params_.residual_tolerance, params_.relative_residual); - return tl.AddTask(solver_id, TF(CopyData), sol_fields, md_x, md_u); + return tl.AddTask(solver_id, TF(CopyData), md_x, md_u); } Real GetSquaredResidualSum() const { return residual.val; } diff --git a/src/solvers/solver_utils_stages.hpp b/src/solvers/solver_utils_stages.hpp index 155630f62e99..fa7196349cc1 100644 --- a/src/solvers/solver_utils_stages.hpp +++ b/src/solvers/solver_utils_stages.hpp @@ -28,9 +28,8 @@ namespace solvers { namespace StageUtils { -template -TaskStatus CopyData(const std::vector &fields, - const std::shared_ptr> &md_in, +template +TaskStatus CopyData(const std::shared_ptr> &md_in, const std::shared_ptr> &md_out) { using TE = parthenon::TopologicalElement; TE te = TE::CC; @@ -38,7 +37,7 @@ TaskStatus CopyData(const std::vector &fields, IndexRange jb = md_in->GetBoundsJ(IndexDomain::entire, te); IndexRange kb = md_in->GetBoundsK(IndexDomain::entire, te); - auto desc = parthenon::MakePackDescriptor(md_in.get(), fields); + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_in.get()); auto pack_in = desc.GetPack(md_in.get(), only_fine_on_composite); auto pack_out = desc.GetPack(md_out.get(), only_fine_on_composite); const int scratch_size = 0; @@ -61,9 +60,8 @@ TaskStatus CopyData(const std::vector &fields, return TaskStatus::complete; } -template -TaskStatus AddFieldsAndStoreInteriorSelect(const std::vector &fields, - const std::shared_ptr> &md_a, +template +TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, const std::shared_ptr> &md_out, Real wa = 1.0, Real wb = 1.0, @@ -82,7 +80,7 @@ TaskStatus AddFieldsAndStoreInteriorSelect(const std::vector &field include_block[b] = md_a->GetBlockData(b)->GetBlockPointer()->neighbors.size() == 0; } - auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); auto pack_a = desc.GetPack(md_a.get(), include_block, only_fine_on_composite); auto pack_b = desc.GetPack(md_b.get(), include_block, only_fine_on_composite); auto pack_out = desc.GetPack(md_out.get(), include_block, only_fine_on_composite); @@ -107,22 +105,21 @@ TaskStatus AddFieldsAndStoreInteriorSelect(const std::vector &field return TaskStatus::complete; } -template -TaskStatus AddFieldsAndStore(const std::vector &fields, - const std::shared_ptr> &md_a, +template +TaskStatus AddFieldsAndStore(const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, const std::shared_ptr> &md_out, Real wa = 1.0, Real wb = 1.0) { - return AddFieldsAndStoreInteriorSelect( - fields, md_a, md_b, md_out, wa, wb, false); + return AddFieldsAndStoreInteriorSelect( + md_a, md_b, md_out, wa, wb, false); } -template -TaskStatus SetToZero(const std::vector &fields, const std::shared_ptr> &md) { +template +TaskStatus SetToZero(const std::shared_ptr> &md) { int nblocks = md->NumBlocks(); using TE = parthenon::TopologicalElement; TE te = TE::CC; - auto desc = parthenon::MakePackDescriptor(md.get(), fields); + static auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); auto pack = desc.GetPack(md.get(), only_fine_on_composite); const size_t scratch_size_in_bytes = 0; const int scratch_level = 1; @@ -147,15 +144,15 @@ TaskStatus SetToZero(const std::vector &fields, const std::shared_p return TaskStatus::complete; } -inline TaskStatus ADividedByB(const std::vector &fields, - const std::shared_ptr> &md_a, +template +TaskStatus ADividedByB(const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, const std::shared_ptr> &md_out) { IndexRange ib = md_a->GetBoundsI(IndexDomain::interior); IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior); IndexRange kb = md_a->GetBoundsK(IndexDomain::interior); - auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); auto pack_a = desc.GetPack(md_a.get()); auto pack_b = desc.GetPack(md_b.get()); auto pack_out = desc.GetPack(md_out.get()); @@ -171,8 +168,8 @@ inline TaskStatus ADividedByB(const std::vector &fields, return TaskStatus::complete; } -inline TaskStatus DotProductLocal(const std::vector &fields, - const std::shared_ptr> &md_a, +template +TaskStatus DotProductLocal(const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, AllReduce *adotb) { using TE = parthenon::TopologicalElement; @@ -181,7 +178,7 @@ inline TaskStatus DotProductLocal(const std::vector &fields, IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior, te); IndexRange kb = md_a->GetBoundsK(IndexDomain::interior, te); - auto desc = parthenon::MakePackDescriptor(md_a.get(), fields); + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); auto pack_a = desc.GetPack(md_a.get()); auto pack_b = desc.GetPack(md_b.get()); Real gsum(0); @@ -201,8 +198,8 @@ inline TaskStatus DotProductLocal(const std::vector &fields, return TaskStatus::complete; } -inline TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, - const std::vector &fields, +template +TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, const std::shared_ptr> &md_a, const std::shared_ptr> &md_b) { using namespace impl; @@ -214,7 +211,7 @@ inline TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *ad }, adotb); auto get_adotb = tl.AddTask(TaskQualifier::local_sync, zero_adotb, - DotProductLocal, fields, md_a, md_b, adotb); + DotProductLocal, md_a, md_b, adotb); auto start_global_adotb = tl.AddTask(TaskQualifier::once_per_region, get_adotb, &AllReduce::StartReduce, adotb, MPI_SUM); auto finish_global_adotb = From 73ad5bcf143cc01c35df1b598ec60f936413f799 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 2 Oct 2024 17:42:25 -0600 Subject: [PATCH 29/62] start, no where near compiling --- src/solvers/mg_solver_stages.hpp | 516 +++++++++++++++++++++++++++++++ 1 file changed, 516 insertions(+) create mode 100644 src/solvers/mg_solver_stages.hpp diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp new file mode 100644 index 000000000000..d15bb9d56b3a --- /dev/null +++ b/src/solvers/mg_solver_stages.hpp @@ -0,0 +1,516 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_MG_SOLVER_STAGES_HPP_ +#define SOLVERS_MG_SOLVER_STAGES_HPP_ + +#include +#include +#include +#include +#include +#include +#include + +#include "interface/mesh_data.hpp" +#include "interface/meshblock_data.hpp" +#include "interface/state_descriptor.hpp" +#include "kokkos_abstraction.hpp" +#include "solvers/solver_utils.hpp" +#include "tasks/tasks.hpp" +#include "utils/robust.hpp" +#include "utils/type_list.hpp" + +namespace parthenon { + +namespace solvers { + +struct MGParams { + int max_iters = 1000; + Real residual_tolerance = 1.e-12; + bool do_FAS = true; + std::string smoother = "SRJ2"; + bool two_by_two_diagonal = false; + int max_coarsenings = std::numeric_limits::max(); + std::string prolongation = "OldLinear"; + + MGParams() = default; + MGParams(ParameterInput *pin, const std::string &input_block) { + max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); + residual_tolerance = + pin->GetOrAddReal(input_block, "residual_tolerance", residual_tolerance); + do_FAS = pin->GetOrAddBoolean(input_block, "do_FAS", do_FAS); + smoother = pin->GetOrAddString(input_block, "smoother", smoother); + prolongation = pin->GetOrAddString(input_block, "prolongation", prolongation); + two_by_two_diagonal = + pin->GetOrAddBoolean(input_block, "two_by_two_diagonal", two_by_two_diagonal); + max_coarsenings = + pin->GetOrAddInteger(input_block, "max_coarsenings", max_coarsenings); + } +}; + +class SolverBase { + public: + virtual ~SolverBase(){} + + virtual TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) = 0; + virtual TaskID AddTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) = 0; + + Real GetFinalResidual() const { return final_residual; } + int GetFinalIterations() const { return final_iteration; } + + protected: + Real final_residual; + int final_iteration; +}; + +// The equations class must include a template method +// +// template +// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) +// +// that takes a field associated with x_t and applies +// the matrix A to it and stores the result in y_t. Additionally, +// it must include a template method +// +// template +// TaskStatus SetDiagonal(std::shared_ptr> &md) +// +// That stores the (possibly approximate) diagonal of matrix A in the field +// associated with the type diag_t. This is used for Jacobi iteration. +template +class MGSolverStages : public SolverBase { + public: + using FieldTL = typename equations::IndependentVars; + + std::vector sol_fields; + + // Name of user defined container that should contain information required to + // calculate the matrix part of the matrix vector product + std::string container_base; + // User defined container in which the solution will reside, only needs to contain sol_fields + // TODO(LFR): Also allow for an initial guess to come in here + std::string container_u; + // User defined container containing the rhs vector, only needs to contain sol_fields + std::string container_rhs; + // Internal containers for solver which create deep copies of sol_fields + std::string container_res_err, container_temp, container_u0, container_diag; + + MGSolverStages(const std::string &container_base, + const std::string &container_u, + const std::string &container_rhs, + StateDescriptor *pkg, + MGParams params_in, + equations eq_in = equations()) + : container_base(container_base), + container_u(container_u), + container_rhs(container_rhs), + params_(params_in), + iter_counter(0), + eqs_(eq_in), + container_(container) { + FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); + std::string solver_id = "mg"; + container_res_err = solver_id + "_res_err"; + container_temp = solver_id + "_temp"; + container_u0 = solver_id + "_u0"; + container_diag = solver_id + "_diag"; + } + + TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { + using namespace utils; + TaskID none; + auto [itl, solve_id] = tl.AddSublist(dependence, {1, this->params_.max_iters}); + iter_counter = -1; + auto update_iter = itl.AddTask( + TaskQualifier::local_sync | TaskQualifier::once_per_region, none, "print", + [](int *iter_counter) { + (*iter_counter)++; + if (*iter_counter > 1 || Globals::my_rank != 0) return TaskStatus::complete; + printf("# [0] v-cycle\n# [1] rms-residual\n# [2] rms-error\n"); + return TaskStatus::complete; + }, + &iter_counter); + auto mg_finest = AddLinearOperatorTasks(itl, update_iter, partition, pmesh); + + auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::leaf()); + if (partition >= partitions.size()) + PARTHENON_FAIL("Does not work with non-default partitioning."); + auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(container_u, md, sol_fields); + auto &md_res_err = pmesh->mesh_data.Add(container_res_err, md, sol_fields); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, md, sol_fields); + auto comm = AddBoundaryExchangeTasks(mg_finest, itl, md_u, + pmesh->multilevel); + auto calc_pointwise_res = eqs_.template Ax(itl, comm, md, md_u, md_res_err); + calc_pointwise_res = itl.AddTask( + calc_pointwise_res, TF(AddFieldsAndStoreInteriorSelect), + md_rhs, md_res_err, md_res_err, 1.0, -1.0, false); + auto get_res = DotProduct(calc_pointwise_res, itl, &residual, md_res_err, md_res_err); + + auto check = itl.AddTask( + TaskQualifier::completion, get_res, "Check residual", + [partition](MGSolver *solver, Mesh *pmesh) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + if (Globals::my_rank == 0 && partition == 0) + printf("%i %e\n", solver->iter_counter, rms_res); + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + if (rms_res > solver->params_.residual_tolerance) return TaskStatus::iterate; + return TaskStatus::complete; + }, + this, pmesh); + + return solve_id; + } + + TaskID AddLinearOperatorTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) { + using namespace utils; + iter_counter = 0; + + int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, + pmesh->GetGMGMinLevel()); + int max_level = pmesh->GetGMGMaxLevel(); + // We require a local pre- and post-MG sync since multigrid iterations require + // communication across blocks and partitions on the multigrid levels do not + // necessarily contain the same blocks as partitions on the leaf grid. This + // means that without the syncs, leaf partitions can receive messages erroneously + // receive messages and/or update block data during a MG step. + auto pre_sync = tl.AddTask(TaskQualifier::local_sync, dependence, + []() { return TaskStatus::complete; }); + auto mg = pre_sync; + for (int level = max_level; level >= min_level; --level) { + mg = mg | AddMultiGridTasksPartitionLevel(tl, dependence, partition, level, + min_level, max_level, pmesh); + } + auto post_sync = + tl.AddTask(TaskQualifier::local_sync, mg, []() { return TaskStatus::complete; }); + return post_sync; + } + + TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { + using namespace utils; + + int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, + pmesh->GetGMGMinLevel()); + int max_level = pmesh->GetGMGMaxLevel(); + + auto mg_setup = dependence; + for (int level = max_level; level >= min_level; --level) { + mg_setup = + mg_setup | AddMultiGridSetupPartitionLevel(tl, dependence, partition, level, + min_level, max_level, pmesh); + } + return mg_setup; + } + + Real GetSquaredResidualSum() const { return residual.val; } + int GetCurrentIterations() const { return iter_counter; } + + protected: + MGParams params_; + int iter_counter; + AllReduce residual; + equations eqs_; + std::string container_; + + // These functions apparently have to be public to compile with cuda since + // they contain device side lambdas + public: + template + TaskStatus Jacobi(std::shared_ptr> &md, double weight) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + if (md->grid.type == GridType::two_level_composite) { + int current_level = md->grid.logical_level; + for (int b = 0; b < nblocks; ++b) { + include_block[b] = + md->GetBlockData(b)->GetBlockPointer()->loc.level() == current_level; + } + } + static auto desc = + parthenon::MakePackDescriptor(md.get()); + auto pack = desc.GetPack(md.get(), include_block); + const int scratch_size = 0; + const int scratch_level = 0; + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "Jacobi", DevExecSpace(), scratch_size, + scratch_level, 0, pack.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + const int nvars = + pack.GetUpperBound(b, xnew_t()) - pack.GetLowerBound(b, xnew_t()) + 1; + for (int c = 0; c < nvars; ++c) { + Real *Ax = &pack(b, te, Axold_t(c), k, jb.s, ib.s); + Real *diag = &pack(b, te, D_t(c), k, jb.s, ib.s); + Real *prhs = &pack(b, te, rhs_t(c), k, jb.s, ib.s); + Real *xo = &pack(b, te, xold_t(c), k, jb.s, ib.s); + Real *xn = &pack(b, te, xnew_t(c), k, jb.s, ib.s); + // Use ptr arithmetic to get the number of points we need to go over + // (including ghost zones) to get from (k, jb.s, ib.s) to (k, jb.e, ib.e) + const int npoints = &pack(b, te, Axold_t(c), k, jb.e, ib.e) - Ax + 1; + parthenon::par_for_inner( + DEFAULT_INNER_LOOP_PATTERN, member, 0, npoints - 1, [&](const int idx) { + const Real off_diag = Ax[idx] - diag[idx] * xo[idx]; + const Real val = prhs[idx] - off_diag; + xn[idx] = + weight * robust::ratio(val, diag[idx]) + (1.0 - weight) * xo[idx]; + }); + } + }); + return TaskStatus::complete; + } + + template + TaskID AddJacobiIteration(TL_t &tl, TaskID depends_on, bool multilevel, Real omega, + std::shared_ptr> &md, + std::shared_ptr> &md_comm) { + using namespace utils; + + auto comm = + AddBoundaryExchangeTasks(depends_on, tl, md_comm, multilevel); + auto mat_mult = eqs_.template Ax(tl, comm, md); + return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, + md, omega); + } + + template + TaskID AddSRJIteration(TL_t &tl, TaskID depends_on, int stages, bool multilevel, + std::shared_ptr> &md, + std::shared_ptr> &md_comm) { + using namespace utils; + int ndim = md->GetParentPointer()->ndim; + + std::array, 3> omega_M1{ + {{1.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {1.0, 0.0, 0.0}}}; + // Damping factors from Yang & Mittal (2017) + std::array, 3> omega_M2{ + {{0.8723, 0.5395, 0.0000}, {1.3895, 0.5617, 0.0000}, {1.7319, 0.5695, 0.0000}}}; + std::array, 3> omega_M3{ + {{0.9372, 0.6667, 0.5173}, {1.6653, 0.8000, 0.5264}, {2.2473, 0.8571, 0.5296}}}; + + if (stages == 0) return depends_on; + auto omega = omega_M1; + if (stages == 2) omega = omega_M2; + if (stages == 3) omega = omega_M3; + // This copy is to set the coarse blocks in temp to the values in u so that + // fine-coarse boundaries of temp are correctly updated during communication + depends_on = tl.AddTask(depends_on, TF(CopyData), md); + auto jacobi1 = AddJacobiIteration( + tl, depends_on, multilevel, omega[ndim - 1][0], md, md_comm); + auto copy1 = tl.AddTask(jacobi1, TF(CopyData), md); + if (stages < 2) return copy1; + auto jacobi2 = AddJacobiIteration( + tl, copy1, multilevel, omega[ndim - 1][1], md, md_comm); + auto copy2 = tl.AddTask(jacobi2, TF(CopyData), md); + if (stages < 3) return copy2; + auto jacobi3 = AddJacobiIteration( + tl, copy2, multilevel, omega[ndim - 1][2], md, md_comm); + return tl.AddTask(jacobi3, TF(CopyData), md); + } + + template + TaskID AddMultiGridSetupPartitionLevel(TL_t &tl, TaskID dependence, int partition, + int level, int min_level, int max_level, + Mesh *pmesh) { + using namespace utils; + + auto partitions = + pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); + if (partition >= partitions.size()) return dependence; + auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); + + auto task_out = dependence; + if (level < max_level) { + task_out = + tl.AddTask(task_out, TF(ReceiveBoundBufs), md); + task_out = tl.AddTask(task_out, TF(SetBounds), md); + } + + // If we are finer than the coarsest level: + if (level > min_level) { + task_out = + tl.AddTask(task_out, TF(SendBoundBufs), md); + } + + // The boundaries are not up to date on return + return task_out; + } + + TaskID AddMultiGridTasksPartitionLevel(TaskList &tl, TaskID dependence, int partition, + int level, int min_level, int max_level, + Mesh *pmesh) { + using namespace utils; + auto smoother = params_.smoother; + bool do_FAS = params_.do_FAS; + int pre_stages, post_stages; + if (smoother == "none") { + pre_stages = 0; + post_stages = 0; + } else if (smoother == "SRJ1") { + pre_stages = 1; + post_stages = 1; + } else if (smoother == "SRJ2") { + pre_stages = 2; + post_stages = 2; + } else if (smoother == "SRJ3") { + pre_stages = 3; + post_stages = 3; + } else { + PARTHENON_FAIL("Unknown smoother type."); + } + +// auto decorate_task_name = [partition, level](const std::string &in, auto b) { +// return std::make_tuple(in + "(p:" + std::to_string(partition) + +// ", l:" + std::to_string(level) + ")", +// 1, b); +// }; + +// #define BTF(...) decorate_task_name(TF(__VA_ARGS__)) +#define BTF(...) TF(__VA_ARGS__) + bool multilevel = (level != min_level); + + auto partitions = + pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); + if (partition >= partitions.size()) return dependence; + auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); + auto &md_comm = pmesh->mesh_data.AddShallow( + "mg_comm", md, std::vector{u::name(), res_err::name()}); + + // 0. Receive residual from coarser level if there is one + auto set_from_finer = dependence; + if (level < max_level) { + // Fill fields with restricted values + auto recv_from_finer = tl.AddTask( + dependence, TF(ReceiveBoundBufs), md_comm); + set_from_finer = tl.AddTask( + recv_from_finer, BTF(SetBounds), md_comm); + // 1. Copy residual from dual purpose communication field to the rhs, should be + // actual RHS for finest level + if (!do_FAS) { + auto zero_u = tl.AddTask(set_from_finer, BTF(SetToZero), md); + auto copy_rhs = tl.AddTask(set_from_finer, BTF(CopyData), md); + set_from_finer = zero_u | copy_rhs; + } else { + // TODO(LFR): Determine if this boundary exchange task is required, I think it is + // to make sure that the boundaries of the restricted u are up to date before + // calling Ax. That being said, at least in one case commenting this line out + // didn't seem to impact the solution. + set_from_finer = AddBoundaryExchangeTasks( + set_from_finer, tl, md_comm, multilevel); + set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md); + // This should set the rhs only in blocks that correspond to interior nodes, the + // RHS of leaf blocks that are on this GMG level should have already been set on + // entry into multigrid + set_from_finer = eqs_.template Ax(tl, set_from_finer, md); + set_from_finer = + tl.AddTask(set_from_finer, + BTF(AddFieldsAndStoreInteriorSelect), md, + 1.0, 1.0, true); + } + } else { + set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md); + } + + // 2. Do pre-smooth and fill solution on this level + set_from_finer = + tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md); + auto pre_smooth = AddSRJIteration( + tl, set_from_finer, pre_stages, multilevel, md, md_comm); + // If we are finer than the coarsest level: + auto post_smooth = pre_smooth; + if (level > min_level) { + // 3. Communicate same level boundaries so that u is up to date everywhere + auto comm_u = AddBoundaryExchangeTasks(pre_smooth, tl, + md_comm, multilevel); + + // 4. Caclulate residual and store in communication field + auto residual = eqs_.template Ax(tl, comm_u, md); + residual = tl.AddTask( + residual, BTF(AddFieldsAndStoreInteriorSelect), md, + 1.0, -1.0, false); + + // 5. Restrict communication field and send to next level + auto communicate_to_coarse = tl.AddTask( + residual, BTF(SendBoundBufs), md_comm); + + // 6. Receive error field into communication field and prolongate + auto recv_from_coarser = + tl.AddTask(communicate_to_coarse, + TF(ReceiveBoundBufs), md_comm); + auto set_from_coarser = tl.AddTask( + recv_from_coarser, BTF(SetBounds), md_comm); + auto prolongate = set_from_coarser; + if (params_.prolongation == "User") { + prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_comm); + } else { + prolongate = + tl.AddTask(set_from_coarser, + BTF(ProlongateBounds), md_comm); + } + + // 7. Correct solution on this level with res_err field and store in + // communication field + auto update_sol = tl.AddTask( + prolongate, BTF(AddFieldsAndStore), md, 1.0, 1.0); + + // 8. Post smooth using communication field and stored RHS + post_smooth = AddSRJIteration(tl, update_sol, post_stages, + multilevel, md, md_comm); + + } else { + post_smooth = tl.AddTask(pre_smooth, BTF(CopyData), md); + } + + // 9. Send communication field to next finer level (should be error field for that + // level) + TaskID last_task = post_smooth; + if (level < max_level) { + auto copy_over = post_smooth; + if (!do_FAS) { + copy_over = tl.AddTask(post_smooth, BTF(CopyData), md); + } else { + auto calc_err = tl.AddTask( + post_smooth, BTF(AddFieldsAndStore), md, 1.0, -1.0); + copy_over = calc_err; + } + // This is required to make sure boundaries of res_err are up to date before + // prolongation + copy_over = tl.AddTask(copy_over, BTF(CopyData), md); + copy_over = tl.AddTask(copy_over, BTF(CopyData), md); + auto boundary = AddBoundaryExchangeTasks( + copy_over, tl, md_comm, multilevel); + auto copy_back = tl.AddTask(boundary, BTF(CopyData), md); + copy_back = tl.AddTask(copy_back, BTF(CopyData), md); + last_task = tl.AddTask(copy_back, + BTF(SendBoundBufs), md); + } + // The boundaries are not up to date on return + return last_task; + } +}; + +} // namespace solvers + +} // namespace parthenon + +#endif // SOLVERS_MG_SOLVER_STAGES_HPP_ From 6ebb671f185539c7a376149c19068d07e8d42995 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 12:57:19 -0600 Subject: [PATCH 30/62] seemingly working staged MG --- example/poisson_gmg/poisson_package.cpp | 8 +- src/CMakeLists.txt | 1 + src/solvers/cg_solver_stages.hpp | 18 +- src/solvers/mg_solver_stages.hpp | 249 +++++++++++------------- 4 files changed, 135 insertions(+), 141 deletions(-) diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index df71b6540c82..04e22d64976b 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -124,9 +124,11 @@ std::shared_ptr Initialize(ParameterInput *pin) { // Holds the discretized version of D in \nabla \cdot D(\vec{x}) \nabla u = rhs. D = 1 // for the standard Poisson equation. pkg->AddField(D::name(), mD); - - auto mflux_comm = Metadata({Metadata::Cell, Metadata::Independent, Metadata::FillGhost, - Metadata::WithFluxes, Metadata::GMGRestrict}); + + std::vector flags{Metadata::Cell, Metadata::Independent, Metadata::FillGhost, + Metadata::WithFluxes, Metadata::GMGRestrict}; + if (solver == "CGStages") flags.push_back(Metadata::GMGProlongate); + auto mflux_comm = Metadata(flags); if (prolong == "Linear") { mflux_comm.RegisterRefinementOps(); } else if (prolong == "Constant") { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cddfc68dc30d..9ed63a7fe15a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -230,6 +230,7 @@ add_library(parthenon solvers/cg_solver.hpp solvers/cg_solver_stages.hpp solvers/mg_solver.hpp + solvers/mg_solver_stages.hpp solvers/solver_utils.hpp solvers/solver_utils_stages.hpp diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index 3b4f981348d3..c3e1f22a586a 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -25,6 +25,7 @@ #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" #include "solvers/mg_solver.hpp" +#include "solvers/mg_solver_stages.hpp" #include "solvers/cg_solver.hpp" #include "solvers/solver_utils.hpp" #include "solvers/solver_utils_stages.hpp" @@ -46,6 +47,7 @@ template class CGSolverStages : public SolverBase { using FieldTL = typename equations::IndependentVars; + using preconditioner_t = MGSolverStages; std::vector sol_fields; // Name of user defined container that should contain information required to @@ -67,7 +69,8 @@ class CGSolverStages : public SolverBase { StateDescriptor *pkg, CGParams params_in, const equations &eq_in = equations()) - : container_base(container_base), + : preconditioner(container_base, container_u, container_rhs, pkg, params_in.mg_params, eq_in), + container_base(container_base), container_u(container_u), container_rhs(container_rhs), params_(params_in), @@ -82,8 +85,7 @@ class CGSolverStages : public SolverBase { } TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - return dependence; - //return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); + return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); } TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { @@ -160,11 +162,10 @@ class CGSolverStages : public SolverBase { // 1. u <- M r auto precon = reset; if (params_.precondition) { - //auto set_rhs = itl.AddTask(precon, TF(CopyData), md_r, m_rhs); - //auto zero_u = itl.AddTask(precon, TF(SetToZero), md_u); - //precon = - // preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); - PARTHENON_FAIL("Preconditioning not yet implemented."); + auto set_rhs = itl.AddTask(precon, TF(CopyData), md_r, md_rhs); + auto zero_u = itl.AddTask(precon, TF(SetToZero), md_u); + precon = + preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); } else { precon = itl.AddTask(precon, TF(CopyData), md_r, md_u); } @@ -253,6 +254,7 @@ class CGSolverStages : public SolverBase { CGParams &GetParams() { return params_; } protected: + preconditioner_t preconditioner; CGParams params_; int iter_counter; AllReduce ru, pAp, residual, rhs2; diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index d15bb9d56b3a..9805920cbced 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -26,6 +26,7 @@ #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" #include "solvers/solver_utils.hpp" +#include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" #include "utils/robust.hpp" #include "utils/type_list.hpp" @@ -34,47 +35,6 @@ namespace parthenon { namespace solvers { -struct MGParams { - int max_iters = 1000; - Real residual_tolerance = 1.e-12; - bool do_FAS = true; - std::string smoother = "SRJ2"; - bool two_by_two_diagonal = false; - int max_coarsenings = std::numeric_limits::max(); - std::string prolongation = "OldLinear"; - - MGParams() = default; - MGParams(ParameterInput *pin, const std::string &input_block) { - max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); - residual_tolerance = - pin->GetOrAddReal(input_block, "residual_tolerance", residual_tolerance); - do_FAS = pin->GetOrAddBoolean(input_block, "do_FAS", do_FAS); - smoother = pin->GetOrAddString(input_block, "smoother", smoother); - prolongation = pin->GetOrAddString(input_block, "prolongation", prolongation); - two_by_two_diagonal = - pin->GetOrAddBoolean(input_block, "two_by_two_diagonal", two_by_two_diagonal); - max_coarsenings = - pin->GetOrAddInteger(input_block, "max_coarsenings", max_coarsenings); - } -}; - -class SolverBase { - public: - virtual ~SolverBase(){} - - virtual TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) = 0; - virtual TaskID AddTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) = 0; - - Real GetFinalResidual() const { return final_residual; } - int GetFinalIterations() const { return final_iteration; } - - protected: - Real final_residual; - int final_iteration; -}; - // The equations class must include a template method // // template @@ -118,8 +78,7 @@ class MGSolverStages : public SolverBase { container_rhs(container_rhs), params_(params_in), iter_counter(0), - eqs_(eq_in), - container_(container) { + eqs_(eq_in) { FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); std::string solver_id = "mg"; container_res_err = solver_id + "_res_err"; @@ -129,7 +88,7 @@ class MGSolverStages : public SolverBase { } TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace utils; + using namespace StageUtils; TaskID none; auto [itl, solve_id] = tl.AddSublist(dependence, {1, this->params_.max_iters}); iter_counter = -1; @@ -161,7 +120,7 @@ class MGSolverStages : public SolverBase { auto check = itl.AddTask( TaskQualifier::completion, get_res, "Check residual", - [partition](MGSolver *solver, Mesh *pmesh) { + [partition](MGSolverStages *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && partition == 0) printf("%i %e\n", solver->iter_counter, rms_res); @@ -177,7 +136,7 @@ class MGSolverStages : public SolverBase { TaskID AddLinearOperatorTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - using namespace utils; + using namespace StageUtils; iter_counter = 0; int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, @@ -201,7 +160,7 @@ class MGSolverStages : public SolverBase { } TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - using namespace utils; + using namespace StageUtils; int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, pmesh->GetGMGMinLevel()); @@ -229,45 +188,53 @@ class MGSolverStages : public SolverBase { // These functions apparently have to be public to compile with cuda since // they contain device side lambdas public: - template - TaskStatus Jacobi(std::shared_ptr> &md, double weight) { + TaskStatus Jacobi(std::shared_ptr> &md_rhs, + std::shared_ptr> &md_Ax, + std::shared_ptr> &md_diag, + std::shared_ptr> &md_xold, + std::shared_ptr> &md_xnew, + double weight) { using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; + const int ndim = md_rhs->GetMeshPointer()->ndim; using TE = parthenon::TopologicalElement; TE te = TE::CC; - IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); + IndexRange ib = md_rhs->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md_rhs->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md_rhs->GetBoundsK(IndexDomain::interior, te); - int nblocks = md->NumBlocks(); + int nblocks = md_rhs->NumBlocks(); std::vector include_block(nblocks, true); - if (md->grid.type == GridType::two_level_composite) { - int current_level = md->grid.logical_level; + if (md_rhs->grid.type == GridType::two_level_composite) { + int current_level = md_rhs->grid.logical_level; for (int b = 0; b < nblocks; ++b) { include_block[b] = - md->GetBlockData(b)->GetBlockPointer()->loc.level() == current_level; + md_rhs->GetBlockData(b)->GetBlockPointer()->loc.level() == current_level; } } static auto desc = - parthenon::MakePackDescriptor(md.get()); - auto pack = desc.GetPack(md.get(), include_block); + parthenon::MakePackDescriptorFromTypeList(md_rhs.get()); + auto pack_rhs = desc.GetPack(md_rhs.get(), include_block); + auto pack_Ax = desc.GetPack(md_Ax.get(), include_block); + auto pack_diag = desc.GetPack(md_diag.get(), include_block); + auto pack_xold = desc.GetPack(md_xold.get(), include_block); + auto pack_xnew = desc.GetPack(md_xnew.get(), include_block); const int scratch_size = 0; const int scratch_level = 0; parthenon::par_for_outer( DEFAULT_OUTER_LOOP_PATTERN, "Jacobi", DevExecSpace(), scratch_size, - scratch_level, 0, pack.GetNBlocks() - 1, kb.s, kb.e, + scratch_level, 0, pack_rhs.GetNBlocks() - 1, kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { const int nvars = - pack.GetUpperBound(b, xnew_t()) - pack.GetLowerBound(b, xnew_t()) + 1; + pack_rhs.GetUpperBound(b) - pack_rhs.GetLowerBound(b) + 1; for (int c = 0; c < nvars; ++c) { - Real *Ax = &pack(b, te, Axold_t(c), k, jb.s, ib.s); - Real *diag = &pack(b, te, D_t(c), k, jb.s, ib.s); - Real *prhs = &pack(b, te, rhs_t(c), k, jb.s, ib.s); - Real *xo = &pack(b, te, xold_t(c), k, jb.s, ib.s); - Real *xn = &pack(b, te, xnew_t(c), k, jb.s, ib.s); + Real *Ax = &pack_Ax(b, te, c, k, jb.s, ib.s); + Real *diag = &pack_diag(b, te, c, k, jb.s, ib.s); + Real *prhs = &pack_rhs(b, te, c, k, jb.s, ib.s); + Real *xo = &pack_xold(b, te, c, k, jb.s, ib.s); + Real *xn = &pack_xnew(b, te, c, k, jb.s, ib.s); // Use ptr arithmetic to get the number of points we need to go over // (including ghost zones) to get from (k, jb.s, ib.s) to (k, jb.e, ib.e) - const int npoints = &pack(b, te, Axold_t(c), k, jb.e, ib.e) - Ax + 1; + const int npoints = &pack_Ax(b, te, c, k, jb.e, ib.e) - Ax + 1; parthenon::par_for_inner( DEFAULT_INNER_LOOP_PATTERN, member, 0, npoints - 1, [&](const int idx) { const Real off_diag = Ax[idx] - diag[idx] * xo[idx]; @@ -280,25 +247,37 @@ class MGSolverStages : public SolverBase { return TaskStatus::complete; } - template - TaskID AddJacobiIteration(TL_t &tl, TaskID depends_on, bool multilevel, Real omega, - std::shared_ptr> &md, - std::shared_ptr> &md_comm) { - using namespace utils; + template + TaskID AddJacobiIteration(TaskList &tl, TaskID depends_on, bool multilevel, Real omega, + int partition, int level, + std::shared_ptr> &md_in, + std::shared_ptr> &md_out) { + using namespace StageUtils; + auto pmesh = md_in->GetMeshPointer(); + auto partitions = + pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); + auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + auto &md_diag = pmesh->mesh_data.Add(container_diag, md_base, sol_fields); auto comm = - AddBoundaryExchangeTasks(depends_on, tl, md_comm, multilevel); - auto mat_mult = eqs_.template Ax(tl, comm, md); - return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, - md, omega); + AddBoundaryExchangeTasks(depends_on, tl, md_in, multilevel); + auto mat_mult = eqs_.template Ax(tl, comm, md_base, md_in, md_out); + return tl.AddTask(mat_mult, TF(&MGSolverStages::Jacobi), this, + md_rhs, md_out, md_diag, md_in, md_out, omega); } template TaskID AddSRJIteration(TL_t &tl, TaskID depends_on, int stages, bool multilevel, - std::shared_ptr> &md, - std::shared_ptr> &md_comm) { - using namespace utils; - int ndim = md->GetParentPointer()->ndim; + int partition, int level, Mesh *pmesh) { + using namespace StageUtils; + + const int ndim = pmesh->ndim; + auto partitions = + pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); + auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(container_u, md_base, sol_fields); + auto &md_temp = pmesh->mesh_data.Add(container_temp, md_base, sol_fields); std::array, 3> omega_M1{ {{1.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {1.0, 0.0, 0.0}}}; @@ -314,30 +293,31 @@ class MGSolverStages : public SolverBase { if (stages == 3) omega = omega_M3; // This copy is to set the coarse blocks in temp to the values in u so that // fine-coarse boundaries of temp are correctly updated during communication - depends_on = tl.AddTask(depends_on, TF(CopyData), md); - auto jacobi1 = AddJacobiIteration( - tl, depends_on, multilevel, omega[ndim - 1][0], md, md_comm); - auto copy1 = tl.AddTask(jacobi1, TF(CopyData), md); + depends_on = tl.AddTask(depends_on, TF(CopyData), md_u, md_temp); + auto jacobi1 = AddJacobiIteration( + tl, depends_on, multilevel, omega[ndim - 1][0], partition, level, md_u, md_temp); + auto copy1 = tl.AddTask(jacobi1, TF(CopyData), md_temp, md_u); if (stages < 2) return copy1; - auto jacobi2 = AddJacobiIteration( - tl, copy1, multilevel, omega[ndim - 1][1], md, md_comm); - auto copy2 = tl.AddTask(jacobi2, TF(CopyData), md); + auto jacobi2 = AddJacobiIteration( + tl, copy1, multilevel, omega[ndim - 1][1], partition, level, md_u, md_temp); + auto copy2 = tl.AddTask(jacobi2, TF(CopyData), md_temp, md_u); if (stages < 3) return copy2; - auto jacobi3 = AddJacobiIteration( - tl, copy2, multilevel, omega[ndim - 1][2], md, md_comm); - return tl.AddTask(jacobi3, TF(CopyData), md); + auto jacobi3 = AddJacobiIteration( + tl, copy2, multilevel, omega[ndim - 1][2], partition, level, md_u, md_temp); + return tl.AddTask(jacobi3, TF(CopyData), md_temp, md_u); } template TaskID AddMultiGridSetupPartitionLevel(TL_t &tl, TaskID dependence, int partition, int level, int min_level, int max_level, Mesh *pmesh) { - using namespace utils; + using namespace StageUtils; auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); if (partition >= partitions.size()) return dependence; - auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); + auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); + auto &md_diag = pmesh->mesh_data.Add(container_diag, md, sol_fields); auto task_out = dependence; if (level < max_level) { @@ -345,7 +325,7 @@ class MGSolverStages : public SolverBase { tl.AddTask(task_out, TF(ReceiveBoundBufs), md); task_out = tl.AddTask(task_out, TF(SetBounds), md); } - + task_out = tl.AddTask(task_out, BTF(&equations::template SetDiagonal), &eqs_, md, md_diag); // If we are finer than the coarsest level: if (level > min_level) { task_out = @@ -359,7 +339,7 @@ class MGSolverStages : public SolverBase { TaskID AddMultiGridTasksPartitionLevel(TaskList &tl, TaskID dependence, int partition, int level, int min_level, int max_level, Mesh *pmesh) { - using namespace utils; + using namespace StageUtils; auto smoother = params_.smoother; bool do_FAS = params_.do_FAS; int pre_stages, post_stages; @@ -392,23 +372,32 @@ class MGSolverStages : public SolverBase { auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); if (partition >= partitions.size()) return dependence; - auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); - auto &md_comm = pmesh->mesh_data.AddShallow( - "mg_comm", md, std::vector{u::name(), res_err::name()}); + auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + auto &md_res_err = pmesh->mesh_data.Add(container_res_err, md, sol_fields); + auto &md_temp = pmesh->mesh_data.Add(container_temp, md, sol_fields); + auto &md_u0 = pmesh->mesh_data.Add(container_u0, md, sol_fields); + auto &md_diag = pmesh->mesh_data.Add(container_diag, md, sol_fields); // 0. Receive residual from coarser level if there is one auto set_from_finer = dependence; if (level < max_level) { // Fill fields with restricted values + // TODO: ARGH, WTF this may not be fixable since we need to communicate on two stages concurrently auto recv_from_finer = tl.AddTask( - dependence, TF(ReceiveBoundBufs), md_comm); + dependence, TF(ReceiveBoundBufs), md_u); + set_from_finer = tl.AddTask( + recv_from_finer, BTF(SetBounds), md_u); + recv_from_finer = tl.AddTask( + set_from_finer, TF(ReceiveBoundBufs), md_res_err); set_from_finer = tl.AddTask( - recv_from_finer, BTF(SetBounds), md_comm); + recv_from_finer, BTF(SetBounds), md_res_err); // 1. Copy residual from dual purpose communication field to the rhs, should be // actual RHS for finest level if (!do_FAS) { - auto zero_u = tl.AddTask(set_from_finer, BTF(SetToZero), md); - auto copy_rhs = tl.AddTask(set_from_finer, BTF(CopyData), md); + auto zero_u = tl.AddTask(set_from_finer, BTF(SetToZero), md_u); + auto copy_rhs = tl.AddTask(set_from_finer, BTF(CopyData), md_res_err, md_rhs); set_from_finer = zero_u | copy_rhs; } else { // TODO(LFR): Determine if this boundary exchange task is required, I think it is @@ -416,69 +405,73 @@ class MGSolverStages : public SolverBase { // calling Ax. That being said, at least in one case commenting this line out // didn't seem to impact the solution. set_from_finer = AddBoundaryExchangeTasks( - set_from_finer, tl, md_comm, multilevel); - set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md); + set_from_finer, tl, md_u, multilevel); + set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md_u, md_u0); // This should set the rhs only in blocks that correspond to interior nodes, the // RHS of leaf blocks that are on this GMG level should have already been set on // entry into multigrid - set_from_finer = eqs_.template Ax(tl, set_from_finer, md); + set_from_finer = eqs_.template Ax(tl, set_from_finer, md, md_u, md_temp); set_from_finer = tl.AddTask(set_from_finer, - BTF(AddFieldsAndStoreInteriorSelect), md, - 1.0, 1.0, true); + BTF(AddFieldsAndStoreInteriorSelect), + md_temp, md_res_err, md_rhs, 1.0, 1.0, true); } } else { - set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md); + set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md_u, md_u0); } // 2. Do pre-smooth and fill solution on this level - set_from_finer = - tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md); + //set_from_finer = + // tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md, md_diag); auto pre_smooth = AddSRJIteration( - tl, set_from_finer, pre_stages, multilevel, md, md_comm); + tl, set_from_finer, pre_stages, multilevel, partition, level, pmesh); // If we are finer than the coarsest level: auto post_smooth = pre_smooth; if (level > min_level) { // 3. Communicate same level boundaries so that u is up to date everywhere auto comm_u = AddBoundaryExchangeTasks(pre_smooth, tl, - md_comm, multilevel); + md_u, multilevel); // 4. Caclulate residual and store in communication field - auto residual = eqs_.template Ax(tl, comm_u, md); + auto residual = eqs_.template Ax(tl, comm_u, md, md_u, md_temp); residual = tl.AddTask( - residual, BTF(AddFieldsAndStoreInteriorSelect), md, + residual, BTF(AddFieldsAndStoreInteriorSelect), md_rhs, md_temp, md_res_err, 1.0, -1.0, false); // 5. Restrict communication field and send to next level + // TODO: ARGH, this also needs to get fixed, possibly auto communicate_to_coarse = tl.AddTask( - residual, BTF(SendBoundBufs), md_comm); + residual, BTF(SendBoundBufs), md_u); + communicate_to_coarse = tl.AddTask( + communicate_to_coarse, BTF(SendBoundBufs), md_res_err); // 6. Receive error field into communication field and prolongate auto recv_from_coarser = tl.AddTask(communicate_to_coarse, - TF(ReceiveBoundBufs), md_comm); + TF(ReceiveBoundBufs), md_res_err); auto set_from_coarser = tl.AddTask( - recv_from_coarser, BTF(SetBounds), md_comm); + recv_from_coarser, BTF(SetBounds), md_res_err); auto prolongate = set_from_coarser; if (params_.prolongation == "User") { - prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_comm); + //prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); + PARTHENON_FAIL("Not implemented."); } else { prolongate = tl.AddTask(set_from_coarser, - BTF(ProlongateBounds), md_comm); + BTF(ProlongateBounds), md_res_err); } // 7. Correct solution on this level with res_err field and store in // communication field auto update_sol = tl.AddTask( - prolongate, BTF(AddFieldsAndStore), md, 1.0, 1.0); + prolongate, BTF(AddFieldsAndStore), md_u, md_res_err, md_u, 1.0, 1.0); // 8. Post smooth using communication field and stored RHS post_smooth = AddSRJIteration(tl, update_sol, post_stages, - multilevel, md, md_comm); + multilevel, partition, level, pmesh); } else { - post_smooth = tl.AddTask(pre_smooth, BTF(CopyData), md); + post_smooth = tl.AddTask(pre_smooth, BTF(CopyData), md_u, md_res_err); } // 9. Send communication field to next finer level (should be error field for that @@ -487,22 +480,18 @@ class MGSolverStages : public SolverBase { if (level < max_level) { auto copy_over = post_smooth; if (!do_FAS) { - copy_over = tl.AddTask(post_smooth, BTF(CopyData), md); + copy_over = tl.AddTask(post_smooth, BTF(CopyData), md_u, md_res_err); } else { auto calc_err = tl.AddTask( - post_smooth, BTF(AddFieldsAndStore), md, 1.0, -1.0); + post_smooth, BTF(AddFieldsAndStore), md_u, md_u0, md_res_err, 1.0, -1.0); copy_over = calc_err; } // This is required to make sure boundaries of res_err are up to date before // prolongation - copy_over = tl.AddTask(copy_over, BTF(CopyData), md); - copy_over = tl.AddTask(copy_over, BTF(CopyData), md); auto boundary = AddBoundaryExchangeTasks( - copy_over, tl, md_comm, multilevel); - auto copy_back = tl.AddTask(boundary, BTF(CopyData), md); - copy_back = tl.AddTask(copy_back, BTF(CopyData), md); - last_task = tl.AddTask(copy_back, - BTF(SendBoundBufs), md); + copy_over, tl, md_res_err, multilevel); + last_task = tl.AddTask(boundary, + BTF(SendBoundBufs), md_res_err); } // The boundaries are not up to date on return return last_task; From dee82412473f9e9febe48487b10df9fa38aab0e1 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 13:13:42 -0600 Subject: [PATCH 31/62] start on bicg stages, not compiling --- src/solvers/bicgstab_solver_stages.hpp | 341 +++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 src/solvers/bicgstab_solver_stages.hpp diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp new file mode 100644 index 000000000000..e2bc10918b24 --- /dev/null +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -0,0 +1,341 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_BICGSTAB_SOLVER_STAGES_HPP_ +#define SOLVERS_BICGSTAB_SOLVER_STAGES_HPP_ + +#include +#include +#include +#include +#include + +#include "interface/mesh_data.hpp" +#include "interface/meshblock_data.hpp" +#include "interface/state_descriptor.hpp" +#include "kokkos_abstraction.hpp" +#include "solvers/mg_solver.hpp" +#include "solvers/solver_utils.hpp" +#include "tasks/tasks.hpp" +#include "utils/type_list.hpp" + +namespace parthenon { + +namespace solvers { + +// The equations class must include a template method +// +// template +// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) +// +// that takes a field associated with x_t and applies +// the matrix A to it and stores the result in y_t. +template +class BiCGSTABSolverStages : public SolverBase { + using FieldTL = typename equations::IndependentVars; + using preconditioner_t = MGSolverStages; + + std::vector sol_fields; + // Name of user defined container that should contain information required to + // calculate the matrix part of the matrix vector product + std::string container_base; + // User defined container in which the solution will reside, only needs to contain sol_fields + // TODO(LFR): Also allow for an initial guess to come in here + std::string container_u; + // User defined container containing the rhs vector, only needs to contain sol_fields + std::string container_rhs; + // Internal containers for solver which create deep copies of sol_fields + std::string container_rhat0, container_v, container_h, container_s; + std::string container_t, container_r, container_p, container_x, container_diag; + + public: + + BiCGSTABSolverStages(const std::string &container_base, + const std::string &container_u, + const std::string &container_rhs, + StateDescriptor *pkg, + BiCGSTABParams params_in, + equations eq_in = equations()) + : preconditioner(container_base, container_u, container_rhs, pkg, params_in.mg_params, eq_in), + container_base(container_base), + container_u(container_u), + container_rhs(container_rhs), + params_(params_in), + iter_counter(0), + eqs_(eq_in) { + FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); + std::string solver_id = "bicgstab"; + container_rhat0 = solver_id + "_rhat0"; + container_v = solver_id + "_v"; + container_h = solver_id + "_h"; + container_s = solver_id + "_s"; + container_t = solver_id + "_t"; + container_r = solver_id + "_r"; + container_p = solver_id + "_p"; + container_x = solver_id + "_x"; + container_diag = solver_id + "_diag"; + } + + TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { + if (params_.precondition_type == Preconditioner::Multigrid) { + return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); + } else if (params_.precondition_type == Preconditioner::Diagonal) { + auto partitions = pmesh->GetDefaultBlockPartitions(); + auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); + return tl.AddTask(dependence, &equations::template SetDiagonal, &eqs_, md); + } else { + return dependence; + } + } + + TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { + using namespace StageUtils; + TaskID none; + + auto partitions = pmesh->GetDefaultBlockPartitions(); + // Should contain all fields necessary for applying the matrix to a give state vector, + // e.g. diffusion coefficients and diagonal, these will not be modified by the solvers + auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); + // Container in which the solution is stored and with which the downstream user can + // interact. This container only requires the fields in sol_fields + auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); + // Container of the rhs, only requires fields in sol_fields + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + // Internal solver containers + auto &md_v = pmesh->mesh_data.Add(container_v, md_u, sol_fields); + auto &md_h = pmesh->mesh_data.Add(container_h, md_u, sol_fields); + auto &md_s = pmesh->mesh_data.Add(container_s, md_u, sol_fields); + auto &md_t = pmesh->mesh_data.Add(container_t, md_u, sol_fields); + auto &md_r = pmesh->mesh_data.Add(container_r, md_u, sol_fields); + auto &md_p = pmesh->mesh_data.Add(container_p, md_u, sol_fields); + auto &md_x = pmesh->mesh_data.Add(container_x, md_u, sol_fields); + auto &md_diag = pmesh->mesh_data.Add(container_diag, md_u, sol_fields); + + iter_counter = 0; + bool multilevel = pmesh->multilevel; + + // Initialization: x <- 0, r <- rhs, rhat0 <- rhs, + // rhat0r_old <- (rhat0, r), p <- r, u <- 0 + auto zero_x = tl.AddTask(dependence, TF(SetToZero), md_x); + auto zero_u_init = tl.AddTask(dependence, TF(SetToZero), md_u); + auto copy_r = tl.AddTask(dependence, TF(CopyData), md_rhs, md_r); + auto copy_p = tl.AddTask(dependence, TF(CopyData), md_rhs, md_p); + auto copy_rhat0 = tl.AddTask(dependence, TF(CopyData), md_rhs, md_rhat0); + auto get_rhat0r_init = DotProduct(dependence, tl, &rhat0r, md_rhat0, md_r); + auto get_rhs2 = get_rhat0r_init; + if (params_.relative_residual || params_.print_per_step) + get_rhs2 = DotProduct(dependence, tl, &rhs2, md_rhs, md_rhs); + auto initialize = tl.AddTask( + TaskQualifier::once_per_region | TaskQualifier::local_sync, + zero_x | zero_u_init | copy_r | copy_p | copy_rhat0 | get_rhat0r_init | get_rhs2, + "zero factors", + [](BiCGSTABSolverStages *solver) { + solver->iter_counter = -1; + return TaskStatus::complete; + }, + this); + tl.AddTask( + TaskQualifier::once_per_region, initialize, "print to screen", + [&](BiCGSTABSolverStages *solver, std::shared_ptr res_tol, bool relative_residual, + Mesh *pm) { + if (Globals::my_rank == 0 && params_.print_per_step) { + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) + : *res_tol; + printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", + tol); + printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); + } + return TaskStatus::complete; + }, + this, params_.residual_tolerance, params_.relative_residual, pmesh); + + // BEGIN ITERATIVE TASKS + auto [itl, solver_id] = tl.AddSublist(initialize, {1, params_.max_iters}); + + auto sync = itl.AddTask(TaskQualifier::local_sync, none, + []() { return TaskStatus::complete; }); + auto reset = itl.AddTask( + TaskQualifier::once_per_region, sync, "update values", + [](BiCGSTABSolverStages *solver) { + solver->rhat0r_old = solver->rhat0r.val; + solver->iter_counter++; + return TaskStatus::complete; + }, + this); + + // 1. u <- M p + auto precon1 = reset; + if (params_.precondition_type == Preconditioner::Multigrid) { + auto set_rhs = itl.AddTask(precon1, TF(CopyData), md); + auto zero_u = itl.AddTask(precon1, TF(SetToZero), md); + precon1 = + preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + } else if (params_.precondition_type == Preconditioner::Diagonal) { + precon1 = itl.AddTask(precon1, TF(ADividedByB), md); + } else { + precon1 = itl.AddTask(precon1, TF(CopyData), md); + } + + // 2. v <- A u + auto comm = + AddBoundaryExchangeTasks(precon1, itl, md_comm, multilevel); + auto get_v = eqs_.template Ax(itl, comm, md); + + // 3. rhat0v <- (rhat0, v) + auto get_rhat0v = DotProduct(get_v, itl, &rhat0v, md); + + // 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v) + auto correct_h = itl.AddTask( + get_rhat0v, "h <- x + alpha u", + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + Real alpha = solver->rhat0r_old / solver->rhat0v.val; + return AddFieldsAndStore(md, 1.0, alpha); + }, + this, md); + + // 5. s <- r - alpha v (alpha = rhat0r_old / rhat0v) + auto correct_s = itl.AddTask( + get_rhat0v, "s <- r - alpha v", + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + Real alpha = solver->rhat0r_old / solver->rhat0v.val; + return AddFieldsAndStore(md, 1.0, -alpha); + }, + this, md); + + // Check and print out residual + auto get_res = DotProduct(correct_s, itl, &residual, md); + + auto print = itl.AddTask( + TaskQualifier::once_per_region, get_res, + [&](BiCGSTABSolverStages *solver, Mesh *pmesh) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + if (Globals::my_rank == 0 && solver->params_.print_per_step) + printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res); + return TaskStatus::complete; + }, + this, pmesh); + + // 6. u <- M s + auto precon2 = correct_s; + if (params_.precondition_type == Preconditioner::Multigrid) { + auto set_rhs = itl.AddTask(precon2, TF(CopyData), md); + auto zero_u = itl.AddTask(precon2, TF(SetToZero), md); + precon2 = + preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); + } else if (params_.precondition_type == Preconditioner::Diagonal) { + precon2 = itl.AddTask(precon2, TF(ADividedByB), md); + } else { + precon2 = itl.AddTask(precon2, TF(CopyData), md); + } + + // 7. t <- A u + auto pre_t_comm = + AddBoundaryExchangeTasks(precon2, itl, md_comm, multilevel); + auto get_t = eqs_.template Ax(itl, pre_t_comm, md); + + // 8. omega <- (t,s) / (t,t) + auto get_ts = DotProduct(get_t, itl, &ts, md); + auto get_tt = DotProduct(get_t, itl, &tt, md); + + // 9. x <- h + omega u + auto correct_x = itl.AddTask( + get_tt | get_ts, "x <- h + omega u", + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + Real omega = solver->ts.val / solver->tt.val; + return AddFieldsAndStore(md, 1.0, omega); + }, + this, md); + + // 10. r <- s - omega t + auto correct_r = itl.AddTask( + get_tt | get_ts, "r <- s - omega t", + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + Real omega = solver->ts.val / solver->tt.val; + return AddFieldsAndStore(md, 1.0, -omega); + }, + this, md); + + // Check and print out residual + auto get_res2 = DotProduct(correct_r, itl, &residual, md); + + get_res2 = itl.AddTask( + TaskQualifier::once_per_region, get_res2, + [&](BiCGSTABSolverStages *solver, Mesh *pmesh) { + Real rms_err = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + if (Globals::my_rank == 0 && solver->params_.print_per_step) + printf("%i %e\n", solver->iter_counter * 2 + 2, rms_err); + return TaskStatus::complete; + }, + this, pmesh); + + // 11. rhat0r <- (rhat0, r) + auto get_rhat0r = DotProduct(correct_r, itl, &rhat0r, md); + + // 12. beta <- rhat0r / rhat0r_old * alpha / omega + // 13. p <- r + beta * (p - omega * v) + auto update_p = itl.AddTask( + get_rhat0r | get_res2, "p <- r + beta * (p - omega * v)", + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + Real alpha = solver->rhat0r_old / solver->rhat0v.val; + Real omega = solver->ts.val / solver->tt.val; + Real beta = solver->rhat0r.val / solver->rhat0r_old * alpha / omega; + AddFieldsAndStore(md, 1.0, -omega); + return AddFieldsAndStore(md, 1.0, beta); + return TaskStatus::complete; + }, + this, md); + + // 14. rhat0r_old <- rhat0r, zero all reductions + auto check = itl.AddTask( + TaskQualifier::completion, update_p | correct_x, "rhat0r_old <- rhat0r", + [partition](BiCGSTABSolverStages *solver, Mesh *pmesh, int max_iter, + std::shared_ptr res_tol, bool relative_residual) { + Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) + : *res_tol; + if (rms_res < tol || solver->iter_counter >= max_iter) { + solver->final_residual = rms_res; + solver->final_iteration = solver->iter_counter; + return TaskStatus::complete; + } + return TaskStatus::iterate; + }, + this, pmesh, params_.max_iters, params_.residual_tolerance, + params_.relative_residual); + + return tl.AddTask(solver_id, TF(CopyData), md); + } + + Real GetSquaredResidualSum() const { return residual.val; } + int GetCurrentIterations() const { return iter_counter; } + + BiCGSTABParams &GetParams() { return params_; } + + protected: + preconditioner_t preconditioner; + BiCGSTABParams params_; + int iter_counter; + AllReduce rtr, pAp, rhat0v, rhat0r, ts, tt, residual, rhs2; + Real rhat0r_old; + equations eqs_; + std::string container_; +}; + +} // namespace solvers + +} // namespace parthenon + +#endif // SOLVERS_BICGSTAB_SOLVER_HPP_ From 3f9712f133cdcc1b5efa9b415555417c6c3e52ec Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 13:37:32 -0600 Subject: [PATCH 32/62] seemingly working staged bicgstab solver --- example/poisson_gmg/poisson_package.cpp | 10 ++- src/CMakeLists.txt | 1 + src/solvers/bicgstab_solver_stages.hpp | 92 +++++++++++++++---------- 3 files changed, 65 insertions(+), 38 deletions(-) diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 04e22d64976b..5efcc7b3e63e 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,13 @@ std::shared_ptr Initialize(ParameterInput *pin) { "base", "u", "rhs", pkg.get(), params, PoissEqStages(pin, "poisson")); + } else if (solver == "BiCGSTABStages") { + using PoissEqStages = poisson_package::PoissonEquationStages; + parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); + psolver = std::make_shared>( + "base", "u", "rhs", + pkg.get(), params, + PoissEqStages(pin, "poisson")); } else { PARTHENON_FAIL("Unknown solver type."); } @@ -127,7 +135,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::vector flags{Metadata::Cell, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, Metadata::GMGRestrict}; - if (solver == "CGStages") flags.push_back(Metadata::GMGProlongate); + if (solver == "CGStages" || solver == "BiCGSTABStages") flags.push_back(Metadata::GMGProlongate); auto mflux_comm = Metadata(flags); if (prolong == "Linear") { mflux_comm.RegisterRefinementOps(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9ed63a7fe15a..6be89faaa4e8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -227,6 +227,7 @@ add_library(parthenon amr_criteria/refinement_package.hpp solvers/bicgstab_solver.hpp + solvers/bicgstab_solver_stages.hpp solvers/cg_solver.hpp solvers/cg_solver_stages.hpp solvers/mg_solver.hpp diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index e2bc10918b24..279615f132a4 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -24,7 +24,8 @@ #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" #include "solvers/mg_solver.hpp" -#include "solvers/solver_utils.hpp" +#include "solvers/mg_solver_stages.hpp" +#include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" @@ -91,7 +92,8 @@ class BiCGSTABSolverStages : public SolverBase { } else if (params_.precondition_type == Preconditioner::Diagonal) { auto partitions = pmesh->GetDefaultBlockPartitions(); auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); - return tl.AddTask(dependence, &equations::template SetDiagonal, &eqs_, md); + auto &md_diag = pmesh->mesh_data.Add(container_diag, md, sol_fields); + return tl.AddTask(dependence, &equations::template SetDiagonal, &eqs_, md, md_diag); } else { return dependence; } @@ -111,6 +113,7 @@ class BiCGSTABSolverStages : public SolverBase { // Container of the rhs, only requires fields in sol_fields auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); // Internal solver containers + auto &md_rhat0 = pmesh->mesh_data.Add(container_rhat0, md_u, sol_fields); auto &md_v = pmesh->mesh_data.Add(container_v, md_u, sol_fields); auto &md_h = pmesh->mesh_data.Add(container_h, md_u, sol_fields); auto &md_s = pmesh->mesh_data.Add(container_s, md_u, sol_fields); @@ -176,44 +179,50 @@ class BiCGSTABSolverStages : public SolverBase { // 1. u <- M p auto precon1 = reset; if (params_.precondition_type == Preconditioner::Multigrid) { - auto set_rhs = itl.AddTask(precon1, TF(CopyData), md); - auto zero_u = itl.AddTask(precon1, TF(SetToZero), md); + auto set_rhs = itl.AddTask(precon1, TF(CopyData), md_p, md_rhs); + auto zero_u = itl.AddTask(precon1, TF(SetToZero), md_u); precon1 = preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); } else if (params_.precondition_type == Preconditioner::Diagonal) { - precon1 = itl.AddTask(precon1, TF(ADividedByB), md); + precon1 = itl.AddTask(precon1, TF(ADividedByB), md_p, md_diag, md_u); } else { - precon1 = itl.AddTask(precon1, TF(CopyData), md); + precon1 = itl.AddTask(precon1, TF(CopyData), md_p, md_u); } // 2. v <- A u auto comm = - AddBoundaryExchangeTasks(precon1, itl, md_comm, multilevel); - auto get_v = eqs_.template Ax(itl, comm, md); + AddBoundaryExchangeTasks(precon1, itl, md_u, multilevel); + auto get_v = eqs_.template Ax(itl, comm, md_base, md_u, md_v); // 3. rhat0v <- (rhat0, v) - auto get_rhat0v = DotProduct(get_v, itl, &rhat0v, md); + auto get_rhat0v = DotProduct(get_v, itl, &rhat0v, md_rhat0, md_v); // 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v) auto correct_h = itl.AddTask( get_rhat0v, "h <- x + alpha u", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + [](BiCGSTABSolverStages *solver, + std::shared_ptr> &md_x, + std::shared_ptr> &md_u, + std::shared_ptr> &md_h) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; - return AddFieldsAndStore(md, 1.0, alpha); + return AddFieldsAndStore(md_x, md_u, md_h, 1.0, alpha); }, - this, md); + this, md_x, md_u, md_h); // 5. s <- r - alpha v (alpha = rhat0r_old / rhat0v) auto correct_s = itl.AddTask( get_rhat0v, "s <- r - alpha v", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + [](BiCGSTABSolverStages *solver, + std::shared_ptr> &md_r, + std::shared_ptr> &md_v, + std::shared_ptr> &md_s) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; - return AddFieldsAndStore(md, 1.0, -alpha); + return AddFieldsAndStore(md_r, md_v, md_s, 1.0, -alpha); }, - this, md); + this, md_r, md_v, md_s); // Check and print out residual - auto get_res = DotProduct(correct_s, itl, &residual, md); + auto get_res = DotProduct(correct_s, itl, &residual, md_s, md_s); auto print = itl.AddTask( TaskQualifier::once_per_region, get_res, @@ -228,45 +237,51 @@ class BiCGSTABSolverStages : public SolverBase { // 6. u <- M s auto precon2 = correct_s; if (params_.precondition_type == Preconditioner::Multigrid) { - auto set_rhs = itl.AddTask(precon2, TF(CopyData), md); - auto zero_u = itl.AddTask(precon2, TF(SetToZero), md); + auto set_rhs = itl.AddTask(precon2, TF(CopyData), md_s, md_rhs); + auto zero_u = itl.AddTask(precon2, TF(SetToZero), md_u); precon2 = preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); } else if (params_.precondition_type == Preconditioner::Diagonal) { - precon2 = itl.AddTask(precon2, TF(ADividedByB), md); + precon2 = itl.AddTask(precon2, TF(ADividedByB), md_s, md_diag, md_u); } else { - precon2 = itl.AddTask(precon2, TF(CopyData), md); + precon2 = itl.AddTask(precon2, TF(CopyData), md_s, md_u); } // 7. t <- A u auto pre_t_comm = - AddBoundaryExchangeTasks(precon2, itl, md_comm, multilevel); - auto get_t = eqs_.template Ax(itl, pre_t_comm, md); + AddBoundaryExchangeTasks(precon2, itl, md_u, multilevel); + auto get_t = eqs_.template Ax(itl, pre_t_comm, md_base, md_u, md_t); // 8. omega <- (t,s) / (t,t) - auto get_ts = DotProduct(get_t, itl, &ts, md); - auto get_tt = DotProduct(get_t, itl, &tt, md); + auto get_ts = DotProduct(get_t, itl, &ts, md_t, md_s); + auto get_tt = DotProduct(get_t, itl, &tt, md_t, md_t); // 9. x <- h + omega u auto correct_x = itl.AddTask( get_tt | get_ts, "x <- h + omega u", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + [](BiCGSTABSolverStages *solver, + std::shared_ptr> &md_h, + std::shared_ptr> &md_u, + std::shared_ptr> &md_x) { Real omega = solver->ts.val / solver->tt.val; - return AddFieldsAndStore(md, 1.0, omega); + return AddFieldsAndStore(md_h, md_u, md_x, 1.0, omega); }, - this, md); + this, md_h, md_u, md_x); // 10. r <- s - omega t auto correct_r = itl.AddTask( get_tt | get_ts, "r <- s - omega t", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + [](BiCGSTABSolverStages *solver, + std::shared_ptr> &md_s, + std::shared_ptr> &md_t, + std::shared_ptr> &md_r) { Real omega = solver->ts.val / solver->tt.val; - return AddFieldsAndStore(md, 1.0, -omega); + return AddFieldsAndStore(md_s, md_t, md_r, 1.0, -omega); }, - this, md); + this, md_s, md_t, md_r); // Check and print out residual - auto get_res2 = DotProduct(correct_r, itl, &residual, md); + auto get_res2 = DotProduct(correct_r, itl, &residual, md_r, md_r); get_res2 = itl.AddTask( TaskQualifier::once_per_region, get_res2, @@ -279,21 +294,24 @@ class BiCGSTABSolverStages : public SolverBase { this, pmesh); // 11. rhat0r <- (rhat0, r) - auto get_rhat0r = DotProduct(correct_r, itl, &rhat0r, md); + auto get_rhat0r = DotProduct(correct_r, itl, &rhat0r, md_rhat0, md_r); // 12. beta <- rhat0r / rhat0r_old * alpha / omega // 13. p <- r + beta * (p - omega * v) auto update_p = itl.AddTask( get_rhat0r | get_res2, "p <- r + beta * (p - omega * v)", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md) { + [](BiCGSTABSolverStages *solver, + std::shared_ptr> &md_p, + std::shared_ptr> &md_v, + std::shared_ptr> &md_r) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; Real omega = solver->ts.val / solver->tt.val; Real beta = solver->rhat0r.val / solver->rhat0r_old * alpha / omega; - AddFieldsAndStore(md, 1.0, -omega); - return AddFieldsAndStore(md, 1.0, beta); + AddFieldsAndStore(md_p, md_v, md_p, 1.0, -omega); + return AddFieldsAndStore(md_r, md_p, md_p, 1.0, beta); return TaskStatus::complete; }, - this, md); + this, md_p, md_v, md_r); // 14. rhat0r_old <- rhat0r, zero all reductions auto check = itl.AddTask( @@ -316,7 +334,7 @@ class BiCGSTABSolverStages : public SolverBase { this, pmesh, params_.max_iters, params_.residual_tolerance, params_.relative_residual); - return tl.AddTask(solver_id, TF(CopyData), md); + return tl.AddTask(solver_id, TF(CopyData), md_x, md_u); } Real GetSquaredResidualSum() const { return residual.val; } From aa7fe690e656ceb12d5b18a7eaef00ce1b0034a7 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 13:58:02 -0600 Subject: [PATCH 33/62] format and lint --- example/poisson_gmg/poisson_driver.cpp | 11 +- .../poisson_gmg/poisson_equation_stages.hpp | 71 ++++---- example/poisson_gmg/poisson_package.cpp | 38 ++--- src/interface/make_pack_descriptor.hpp | 5 +- src/solvers/bicgstab_solver_stages.hpp | 79 ++++----- src/solvers/cg_solver_stages.hpp | 64 ++++---- src/solvers/mg_solver.hpp | 8 +- src/solvers/mg_solver_stages.hpp | 152 +++++++++--------- src/solvers/solver_utils_stages.hpp | 32 ++-- 9 files changed, 224 insertions(+), 236 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index 1c872fb667bc..c6f25181380e 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -48,7 +48,8 @@ parthenon::DriverStatus PoissonDriver::Execute() { // After running, retrieve the final residual for checking in tests auto pkg = pmesh->packages.Get("poisson_package"); - auto psolver = pkg->Param>("solver_pointer"); + auto psolver = + pkg->Param>("solver_pointer"); final_rms_residual = psolver->GetFinalResidual(); return DriverStatus::complete; @@ -62,7 +63,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto pkg = pmesh->packages.Get("poisson_package"); auto use_exact_rhs = pkg->Param("use_exact_rhs"); - auto psolver = pkg->Param>("solver_pointer"); + auto psolver = + pkg->Param>("solver_pointer"); auto partitions = pmesh->GetDefaultBlockPartitions(); const int num_partitions = partitions.size(); @@ -86,8 +88,9 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // Set initial solution guess to zero auto zero_u = tl.AddTask(get_rhs, TF(solvers::utils::SetToZero), md); zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); - zero_u = tl.AddTask(zero_u, TF(solvers::StageUtils::CopyData>), md, md_rhs); - zero_u = tl.AddTask(zero_u, TF(solvers::utils::CopyData), md_rhs); + zero_u = tl.AddTask( + zero_u, TF(solvers::StageUtils::CopyData>), md, md_rhs); + zero_u = tl.AddTask(zero_u, TF(solvers::utils::CopyData), md_rhs); auto setup = psolver->AddSetupTasks(tl, zero_u, i, pmesh); auto solve = psolver->AddTasks(tl, setup, i, pmesh); diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index 74b38b6f07df..0411cdee0f0a 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -40,8 +40,8 @@ class PoissonEquationStages { bool include_flux_dx = false; enum class ProlongationType { Constant, Linear, Kwak }; ProlongationType prolongation_type = ProlongationType::Constant; - - using IndependentVars = parthenon::TypeList; + + using IndependentVars = parthenon::TypeList; PoissonEquationStages(parthenon::ParameterInput *pin, const std::string &label) { do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); @@ -67,14 +67,16 @@ class PoissonEquationStages { std::shared_ptr> &md_in, std::shared_ptr> &md_out) { auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md_mat, md_in); - //if (set_flux_boundary) { - // flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md, include_flux_dx); - //} + // if (set_flux_boundary) { + // flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md, include_flux_dx); + // } if (do_flux_cor && !(md_mat->grid.type == parthenon::GridType::two_level_composite)) { auto start_flxcor = tl.AddTask(flux_res, parthenon::StartReceiveFluxCorrections, md_in); - auto send_flxcor = tl.AddTask(flux_res, parthenon::LoadAndSendFluxCorrections, md_in); - auto recv_flxcor = tl.AddTask(start_flxcor, parthenon::ReceiveFluxCorrections, md_in); + auto send_flxcor = + tl.AddTask(flux_res, parthenon::LoadAndSendFluxCorrections, md_in); + auto recv_flxcor = + tl.AddTask(start_flxcor, parthenon::ReceiveFluxCorrections, md_in); flux_res = tl.AddTask(recv_flxcor, parthenon::SetFluxCorrections, md_in); } return tl.AddTask(flux_res, FluxMultiplyMatrix, md_in, md_out); @@ -84,7 +86,7 @@ class PoissonEquationStages { // For a uniform grid or when flux correction is ignored, this diagonal calculation // is exact. Exactness is (probably) not required since it is just used in Jacobi // iterations. - parthenon::TaskStatus SetDiagonal(std::shared_ptr> &md_mat, + parthenon::TaskStatus SetDiagonal(std::shared_ptr> &md_mat, std::shared_ptr> &md_diag) { using namespace parthenon; const int ndim = md_mat->GetMeshPointer()->ndim; @@ -109,21 +111,21 @@ class PoissonEquationStages { const auto &coords = pack_mat.GetCoordinates(b); // Build the unigrid diagonal of the matrix Real dx1 = coords.template Dxc(k, j, i); - Real diag_elem = - -(pack_mat(b, TE::F1, D_t(), k, j, i) + pack_mat(b, TE::F1, D_t(), k, j, i + 1)) / - (dx1 * dx1) - - alpha; + Real diag_elem = -(pack_mat(b, TE::F1, D_t(), k, j, i) + + pack_mat(b, TE::F1, D_t(), k, j, i + 1)) / + (dx1 * dx1) - + alpha; if (ndim > 1) { Real dx2 = coords.template Dxc(k, j, i); - diag_elem -= - (pack_mat(b, TE::F2, D_t(), k, j, i) + pack_mat(b, TE::F2, D_t(), k, j + 1, i)) / - (dx2 * dx2); + diag_elem -= (pack_mat(b, TE::F2, D_t(), k, j, i) + + pack_mat(b, TE::F2, D_t(), k, j + 1, i)) / + (dx2 * dx2); } if (ndim > 2) { Real dx3 = coords.template Dxc(k, j, i); - diag_elem -= - (pack_mat(b, TE::F3, D_t(), k, j, i) + pack_mat(b, TE::F3, D_t(), k + 1, j, i)) / - (dx3 * dx3); + diag_elem -= (pack_mat(b, TE::F3, D_t(), k, j, i) + + pack_mat(b, TE::F3, D_t(), k + 1, j, i)) / + (dx3 * dx3); } pack_diag(b, te, var_t(), k, j, i) = diag_elem; }); @@ -131,7 +133,8 @@ class PoissonEquationStages { } static parthenon::TaskStatus - CalculateFluxes(std::shared_ptr> &md_mat, std::shared_ptr> &md) { + CalculateFluxes(std::shared_ptr> &md_mat, + std::shared_ptr> &md) { using namespace parthenon; const int ndim = md->GetMeshPointer()->ndim; using TE = parthenon::TopologicalElement; @@ -145,11 +148,9 @@ class PoissonEquationStages { int nblocks = md->NumBlocks(); std::vector include_block(nblocks, true); - auto desc = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + auto desc = parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); auto pack = desc.GetPack(md.get(), include_block); - auto desc_mat = - parthenon::MakePackDescriptor(md_mat.get(), {}); + auto desc_mat = parthenon::MakePackDescriptor(md_mat.get(), {}); auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block); parthenon::par_for( "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, @@ -408,8 +409,7 @@ class PoissonEquationStages { static auto desc = parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); - static auto desc_out = - parthenon::MakePackDescriptor(md_out.get()); + static auto desc_out = parthenon::MakePackDescriptor(md_out.get()); auto pack = desc.GetPack(md.get(), include_block); auto pack_out = desc_out.GetPack(md_out.get(), include_block); parthenon::par_for( @@ -418,22 +418,25 @@ class PoissonEquationStages { const auto &coords = pack.GetCoordinates(b); Real dx1 = coords.template Dxc(k, j, i); pack_out(b, te, var_t(), k, j, i) = -alpha * pack(b, te, var_t(), k, j, i); - pack_out(b, te, var_t(), k, j, i) += (pack.flux(b, X1DIR, var_t(), k, j, i) - - pack.flux(b, X1DIR, var_t(), k, j, i + 1)) / - dx1; + pack_out(b, te, var_t(), k, j, i) += + (pack.flux(b, X1DIR, var_t(), k, j, i) - + pack.flux(b, X1DIR, var_t(), k, j, i + 1)) / + dx1; if (ndim > 1) { Real dx2 = coords.template Dxc(k, j, i); - pack_out(b, te, var_t(), k, j, i) += (pack.flux(b, X2DIR, var_t(), k, j, i) - - pack.flux(b, X2DIR, var_t(), k, j + 1, i)) / - dx2; + pack_out(b, te, var_t(), k, j, i) += + (pack.flux(b, X2DIR, var_t(), k, j, i) - + pack.flux(b, X2DIR, var_t(), k, j + 1, i)) / + dx2; } if (ndim > 2) { Real dx3 = coords.template Dxc(k, j, i); - pack_out(b, te, var_t(), k, j, i) += (pack.flux(b, X3DIR, var_t(), k, j, i) - - pack.flux(b, X3DIR, var_t(), k + 1, j, i)) / - dx3; + pack_out(b, te, var_t(), k, j, i) += + (pack.flux(b, X3DIR, var_t(), k, j, i) - + pack.flux(b, X3DIR, var_t(), k + 1, j, i)) / + dx3; } }); return TaskStatus::complete; diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 5efcc7b3e63e..ca7a7964660d 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -94,36 +94,36 @@ std::shared_ptr Initialize(ParameterInput *pin) { PoissonEquation eq(pin, "poisson"); pkg->AddParam<>("poisson_equation", eq, parthenon::Params::Mutability::Mutable); - + std::shared_ptr psolver; if (solver == "MG") { parthenon::solvers::MGParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>(pkg.get(), params, eq); - } else if (solver == "BiCGSTAB") { + psolver = std::make_shared>( + pkg.get(), params, eq); + } else if (solver == "BiCGSTAB") { parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>(pkg.get(), params, eq); + psolver = + std::make_shared>( + pkg.get(), params, eq); } else if (solver == "CG") { parthenon::solvers::CGParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>(pkg.get(), params, eq); + psolver = std::make_shared>( + pkg.get(), params, eq); } else if (solver == "CGStages") { using PoissEqStages = poisson_package::PoissonEquationStages; parthenon::solvers::CGParams params(pin, "poisson/solver_params"); psolver = std::make_shared>( - "base", "u", "rhs", - pkg.get(), params, - PoissEqStages(pin, "poisson")); + "base", "u", "rhs", pkg.get(), params, PoissEqStages(pin, "poisson")); } else if (solver == "BiCGSTABStages") { using PoissEqStages = poisson_package::PoissonEquationStages; parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); psolver = std::make_shared>( - "base", "u", "rhs", - pkg.get(), params, - PoissEqStages(pin, "poisson")); - } else { + "base", "u", "rhs", pkg.get(), params, PoissEqStages(pin, "poisson")); + } else { PARTHENON_FAIL("Unknown solver type."); } - pkg->AddParam<>("solver_pointer", psolver); - + pkg->AddParam<>("solver_pointer", psolver); + using namespace parthenon::refinement_ops; auto mD = Metadata( {Metadata::Independent, Metadata::OneCopy, Metadata::Face, Metadata::GMGRestrict}); @@ -132,10 +132,12 @@ std::shared_ptr Initialize(ParameterInput *pin) { // Holds the discretized version of D in \nabla \cdot D(\vec{x}) \nabla u = rhs. D = 1 // for the standard Poisson equation. pkg->AddField(D::name(), mD); - - std::vector flags{Metadata::Cell, Metadata::Independent, Metadata::FillGhost, - Metadata::WithFluxes, Metadata::GMGRestrict}; - if (solver == "CGStages" || solver == "BiCGSTABStages") flags.push_back(Metadata::GMGProlongate); + + std::vector flags{Metadata::Cell, Metadata::Independent, + Metadata::FillGhost, Metadata::WithFluxes, + Metadata::GMGRestrict}; + if (solver == "CGStages" || solver == "BiCGSTABStages") + flags.push_back(Metadata::GMGProlongate); auto mflux_comm = Metadata(flags); if (prolong == "Linear") { mflux_comm.RegisterRefinementOps(); diff --git a/src/interface/make_pack_descriptor.hpp b/src/interface/make_pack_descriptor.hpp index 1805f84ba678..407ddfca8f83 100644 --- a/src/interface/make_pack_descriptor.hpp +++ b/src/interface/make_pack_descriptor.hpp @@ -86,8 +86,9 @@ inline auto MakePackDescriptor(StateDescriptor *psd, const std::vector -inline auto MakePackDescriptor(MeshData *pmd, Args&&...args) { - return MakePackDescriptor(pmd->GetMeshPointer()->resolved_packages.get(), std::forward(args)...); +inline auto MakePackDescriptor(MeshData *pmd, Args &&...args) { + return MakePackDescriptor(pmd->GetMeshPointer()->resolved_packages.get(), + std::forward(args)...); } template diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 279615f132a4..2ba81187d515 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -46,12 +46,13 @@ class BiCGSTABSolverStages : public SolverBase { using preconditioner_t = MGSolverStages; std::vector sol_fields; - // Name of user defined container that should contain information required to + // Name of user defined container that should contain information required to // calculate the matrix part of the matrix vector product - std::string container_base; - // User defined container in which the solution will reside, only needs to contain sol_fields + std::string container_base; + // User defined container in which the solution will reside, only needs to contain + // sol_fields // TODO(LFR): Also allow for an initial guess to come in here - std::string container_u; + std::string container_u; // User defined container containing the rhs vector, only needs to contain sol_fields std::string container_rhs; // Internal containers for solver which create deep copies of sol_fields @@ -59,22 +60,16 @@ class BiCGSTABSolverStages : public SolverBase { std::string container_t, container_r, container_p, container_x, container_diag; public: - - BiCGSTABSolverStages(const std::string &container_base, - const std::string &container_u, - const std::string &container_rhs, - StateDescriptor *pkg, - BiCGSTABParams params_in, - equations eq_in = equations()) - : preconditioner(container_base, container_u, container_rhs, pkg, params_in.mg_params, eq_in), - container_base(container_base), - container_u(container_u), - container_rhs(container_rhs), - params_(params_in), - iter_counter(0), - eqs_(eq_in) { - FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); - std::string solver_id = "bicgstab"; + BiCGSTABSolverStages(const std::string &container_base, const std::string &container_u, + const std::string &container_rhs, StateDescriptor *pkg, + BiCGSTABParams params_in, equations eq_in = equations()) + : preconditioner(container_base, container_u, container_rhs, pkg, + params_in.mg_params, eq_in), + container_base(container_base), container_u(container_u), + container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + FieldTL::IterateTypes( + [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); + std::string solver_id = "bicgstab"; container_rhat0 = solver_id + "_rhat0"; container_v = solver_id + "_v"; container_h = solver_id + "_h"; @@ -102,13 +97,13 @@ class BiCGSTABSolverStages : public SolverBase { TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { using namespace StageUtils; TaskID none; - + auto partitions = pmesh->GetDefaultBlockPartitions(); - // Should contain all fields necessary for applying the matrix to a give state vector, + // Should contain all fields necessary for applying the matrix to a give state vector, // e.g. diffusion coefficients and diagonal, these will not be modified by the solvers auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); - // Container in which the solution is stored and with which the downstream user can - // interact. This container only requires the fields in sol_fields + // Container in which the solution is stored and with which the downstream user can + // interact. This container only requires the fields in sol_fields auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); // Container of the rhs, only requires fields in sol_fields auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); @@ -148,8 +143,8 @@ class BiCGSTABSolverStages : public SolverBase { this); tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](BiCGSTABSolverStages *solver, std::shared_ptr res_tol, bool relative_residual, - Mesh *pm) { + [&](BiCGSTABSolverStages *solver, std::shared_ptr res_tol, + bool relative_residual, Mesh *pm) { if (Globals::my_rank == 0 && params_.print_per_step) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) @@ -200,10 +195,8 @@ class BiCGSTABSolverStages : public SolverBase { // 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v) auto correct_h = itl.AddTask( get_rhat0v, "h <- x + alpha u", - [](BiCGSTABSolverStages *solver, - std::shared_ptr> &md_x, - std::shared_ptr> &md_u, - std::shared_ptr> &md_h) { + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_x, + std::shared_ptr> &md_u, std::shared_ptr> &md_h) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; return AddFieldsAndStore(md_x, md_u, md_h, 1.0, alpha); }, @@ -212,10 +205,8 @@ class BiCGSTABSolverStages : public SolverBase { // 5. s <- r - alpha v (alpha = rhat0r_old / rhat0v) auto correct_s = itl.AddTask( get_rhat0v, "s <- r - alpha v", - [](BiCGSTABSolverStages *solver, - std::shared_ptr> &md_r, - std::shared_ptr> &md_v, - std::shared_ptr> &md_s) { + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_r, + std::shared_ptr> &md_v, std::shared_ptr> &md_s) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; return AddFieldsAndStore(md_r, md_v, md_s, 1.0, -alpha); }, @@ -259,10 +250,8 @@ class BiCGSTABSolverStages : public SolverBase { // 9. x <- h + omega u auto correct_x = itl.AddTask( get_tt | get_ts, "x <- h + omega u", - [](BiCGSTABSolverStages *solver, - std::shared_ptr> &md_h, - std::shared_ptr> &md_u, - std::shared_ptr> &md_x) { + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_h, + std::shared_ptr> &md_u, std::shared_ptr> &md_x) { Real omega = solver->ts.val / solver->tt.val; return AddFieldsAndStore(md_h, md_u, md_x, 1.0, omega); }, @@ -271,10 +260,8 @@ class BiCGSTABSolverStages : public SolverBase { // 10. r <- s - omega t auto correct_r = itl.AddTask( get_tt | get_ts, "r <- s - omega t", - [](BiCGSTABSolverStages *solver, - std::shared_ptr> &md_s, - std::shared_ptr> &md_t, - std::shared_ptr> &md_r) { + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_s, + std::shared_ptr> &md_t, std::shared_ptr> &md_r) { Real omega = solver->ts.val / solver->tt.val; return AddFieldsAndStore(md_s, md_t, md_r, 1.0, -omega); }, @@ -300,10 +287,8 @@ class BiCGSTABSolverStages : public SolverBase { // 13. p <- r + beta * (p - omega * v) auto update_p = itl.AddTask( get_rhat0r | get_res2, "p <- r + beta * (p - omega * v)", - [](BiCGSTABSolverStages *solver, - std::shared_ptr> &md_p, - std::shared_ptr> &md_v, - std::shared_ptr> &md_r) { + [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_p, + std::shared_ptr> &md_v, std::shared_ptr> &md_r) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; Real omega = solver->ts.val / solver->tt.val; Real beta = solver->rhat0r.val / solver->rhat0r_old * alpha / omega; @@ -356,4 +341,4 @@ class BiCGSTABSolverStages : public SolverBase { } // namespace parthenon -#endif // SOLVERS_BICGSTAB_SOLVER_HPP_ +#endif // SOLVERS_BICGSTAB_SOLVER_STAGES_HPP_ diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index c3e1f22a586a..8de9baf73576 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -24,9 +24,9 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" +#include "solvers/cg_solver.hpp" #include "solvers/mg_solver.hpp" #include "solvers/mg_solver_stages.hpp" -#include "solvers/cg_solver.hpp" #include "solvers/solver_utils.hpp" #include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" @@ -45,38 +45,32 @@ namespace solvers { // the matrix A to it and stores the result in y_t. template class CGSolverStages : public SolverBase { - using FieldTL = typename equations::IndependentVars; using preconditioner_t = MGSolverStages; std::vector sol_fields; - // Name of user defined container that should contain information required to + // Name of user defined container that should contain information required to // calculate the matrix part of the matrix vector product - std::string container_base; - // User defined container in which the solution will reside, only needs to contain sol_fields + std::string container_base; + // User defined container in which the solution will reside, only needs to contain + // sol_fields // TODO(LFR): Also allow for an initial guess to come in here - std::string container_u; + std::string container_u; // User defined container containing the rhs vector, only needs to contain sol_fields std::string container_rhs; // Internal containers for solver which create deep copies of sol_fields std::string container_x, container_r, container_v, container_p; - - public: - CGSolverStages(const std::string &container_base, - const std::string &container_u, - const std::string &container_rhs, - StateDescriptor *pkg, - CGParams params_in, - const equations &eq_in = equations()) - : preconditioner(container_base, container_u, container_rhs, pkg, params_in.mg_params, eq_in), - container_base(container_base), - container_u(container_u), - container_rhs(container_rhs), - params_(params_in), - iter_counter(0), - eqs_(eq_in) { - FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); + public: + CGSolverStages(const std::string &container_base, const std::string &container_u, + const std::string &container_rhs, StateDescriptor *pkg, + CGParams params_in, const equations &eq_in = equations()) + : preconditioner(container_base, container_u, container_rhs, pkg, + params_in.mg_params, eq_in), + container_base(container_base), container_u(container_u), + container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + FieldTL::IterateTypes( + [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); std::string solver_id = "cg"; container_x = solver_id + "_x"; container_r = solver_id + "_r"; @@ -92,18 +86,19 @@ class CGSolverStages : public SolverBase { using namespace StageUtils; TaskID none; auto partitions = pmesh->GetDefaultBlockPartitions(); - // Should contain all fields necessary for applying the matrix to a give state vector, + // Should contain all fields necessary for applying the matrix to a give state vector, // e.g. diffusion coefficients and diagonal, these will not be modified by the solvers auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); - // Container in which the solution is stored and with which the downstream user can - // interact. This container only requires the fields in sol_fields + // Container in which the solution is stored and with which the downstream user can + // interact. This container only requires the fields in sol_fields auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); // Container of the rhs, only requires fields in sol_fields auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); // Internal solver containers auto &md_x = pmesh->mesh_data.Add(container_x, md_u, sol_fields); auto &md_r = pmesh->mesh_data.Add(container_r, md_u, sol_fields); - // TODO(LFR): The v container can probably be removed and the u container used in its stead + // TODO(LFR): The v container can probably be removed and the u container used in its + // stead auto &md_v = pmesh->mesh_data.Add(container_v, md_u, sol_fields); auto &md_p = pmesh->mesh_data.Add(container_p, md_u, sol_fields); @@ -132,8 +127,8 @@ class CGSolverStages : public SolverBase { if (params_.print_per_step && Globals::my_rank == 0) { initialize = tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolverStages *solver, std::shared_ptr res_tol, bool relative_residual, - Mesh *pm) { + [&](CGSolverStages *solver, std::shared_ptr res_tol, + bool relative_residual, Mesh *pm) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) : *res_tol; @@ -176,7 +171,8 @@ class CGSolverStages : public SolverBase { // 3. p <- u + beta p auto correct_p = itl.AddTask( get_ru, "p <- u + beta p", - [](CGSolverStages *solver, std::shared_ptr> &md_u, std::shared_ptr> &md_p) { + [](CGSolverStages *solver, std::shared_ptr> &md_u, + std::shared_ptr> &md_p) { Real beta = solver->iter_counter > 0 ? solver->ru.val / solver->ru_old : 0.0; return AddFieldsAndStore(md_u, md_p, md_p, 1.0, beta); }, @@ -193,8 +189,7 @@ class CGSolverStages : public SolverBase { // 6. x <- x + alpha p auto correct_x = itl.AddTask( get_pAp, "x <- x + alpha p", - [](CGSolverStages *solver, - std::shared_ptr> &md_x, + [](CGSolverStages *solver, std::shared_ptr> &md_x, std::shared_ptr> &md_p) { Real alpha = solver->ru.val / solver->pAp.val; return AddFieldsAndStore(md_x, md_p, md_x, 1.0, alpha); @@ -204,8 +199,7 @@ class CGSolverStages : public SolverBase { // 6. r <- r - alpha A p auto correct_r = itl.AddTask( get_pAp, "r <- r - alpha A p", - [](CGSolverStages *solver, - std::shared_ptr> &md_r, + [](CGSolverStages *solver, std::shared_ptr> &md_r, std::shared_ptr> &md_v) { Real alpha = solver->ru.val / solver->pAp.val; return AddFieldsAndStore(md_r, md_v, md_r, 1.0, -alpha); @@ -227,8 +221,8 @@ class CGSolverStages : public SolverBase { auto check = itl.AddTask( TaskQualifier::completion, get_res | correct_x, "completion", - [](CGSolverStages *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, - bool relative_residual) { + [](CGSolverStages *solver, Mesh *pmesh, int max_iter, + std::shared_ptr res_tol, bool relative_residual) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); solver->final_residual = rms_res; solver->final_iteration = solver->iter_counter; diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 28dbc74dc5dd..1e92473e0282 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -60,16 +60,16 @@ struct MGParams { class SolverBase { public: - virtual ~SolverBase(){} + virtual ~SolverBase() {} virtual TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) = 0; + Mesh *pmesh) = 0; virtual TaskID AddTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) = 0; + Mesh *pmesh) = 0; Real GetFinalResidual() const { return final_residual; } int GetFinalIterations() const { return final_iteration; } - + protected: Real final_residual; int final_iteration; diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 9805920cbced..be0dec6e09de 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -55,31 +55,26 @@ class MGSolverStages : public SolverBase { using FieldTL = typename equations::IndependentVars; std::vector sol_fields; - - // Name of user defined container that should contain information required to + + // Name of user defined container that should contain information required to // calculate the matrix part of the matrix vector product - std::string container_base; - // User defined container in which the solution will reside, only needs to contain sol_fields + std::string container_base; + // User defined container in which the solution will reside, only needs to contain + // sol_fields // TODO(LFR): Also allow for an initial guess to come in here - std::string container_u; + std::string container_u; // User defined container containing the rhs vector, only needs to contain sol_fields std::string container_rhs; // Internal containers for solver which create deep copies of sol_fields std::string container_res_err, container_temp, container_u0, container_diag; - MGSolverStages(const std::string &container_base, - const std::string &container_u, - const std::string &container_rhs, - StateDescriptor *pkg, - MGParams params_in, - equations eq_in = equations()) - : container_base(container_base), - container_u(container_u), - container_rhs(container_rhs), - params_(params_in), - iter_counter(0), - eqs_(eq_in) { - FieldTL::IterateTypes([this](auto t){this->sol_fields.push_back(decltype(t)::name());}); + MGSolverStages(const std::string &container_base, const std::string &container_u, + const std::string &container_rhs, StateDescriptor *pkg, + MGParams params_in, equations eq_in = equations()) + : container_base(container_base), container_u(container_u), + container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + FieldTL::IterateTypes( + [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); std::string solver_id = "mg"; container_res_err = solver_id + "_res_err"; container_temp = solver_id + "_temp"; @@ -113,10 +108,11 @@ class MGSolverStages : public SolverBase { auto comm = AddBoundaryExchangeTasks(mg_finest, itl, md_u, pmesh->multilevel); auto calc_pointwise_res = eqs_.template Ax(itl, comm, md, md_u, md_res_err); - calc_pointwise_res = itl.AddTask( - calc_pointwise_res, TF(AddFieldsAndStoreInteriorSelect), - md_rhs, md_res_err, md_res_err, 1.0, -1.0, false); - auto get_res = DotProduct(calc_pointwise_res, itl, &residual, md_res_err, md_res_err); + calc_pointwise_res = + itl.AddTask(calc_pointwise_res, TF(AddFieldsAndStoreInteriorSelect), + md_rhs, md_res_err, md_res_err, 1.0, -1.0, false); + auto get_res = + DotProduct(calc_pointwise_res, itl, &residual, md_res_err, md_res_err); auto check = itl.AddTask( TaskQualifier::completion, get_res, "Check residual", @@ -188,12 +184,11 @@ class MGSolverStages : public SolverBase { // These functions apparently have to be public to compile with cuda since // they contain device side lambdas public: - TaskStatus Jacobi(std::shared_ptr> &md_rhs, + TaskStatus Jacobi(std::shared_ptr> &md_rhs, std::shared_ptr> &md_Ax, std::shared_ptr> &md_diag, std::shared_ptr> &md_xold, - std::shared_ptr> &md_xnew, - double weight) { + std::shared_ptr> &md_xnew, double weight) { using namespace parthenon; const int ndim = md_rhs->GetMeshPointer()->ndim; using TE = parthenon::TopologicalElement; @@ -211,8 +206,7 @@ class MGSolverStages : public SolverBase { md_rhs->GetBlockData(b)->GetBlockPointer()->loc.level() == current_level; } } - static auto desc = - parthenon::MakePackDescriptorFromTypeList(md_rhs.get()); + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_rhs.get()); auto pack_rhs = desc.GetPack(md_rhs.get(), include_block); auto pack_Ax = desc.GetPack(md_Ax.get(), include_block); auto pack_diag = desc.GetPack(md_diag.get(), include_block); @@ -221,11 +215,10 @@ class MGSolverStages : public SolverBase { const int scratch_size = 0; const int scratch_level = 0; parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "Jacobi", DevExecSpace(), scratch_size, - scratch_level, 0, pack_rhs.GetNBlocks() - 1, kb.s, kb.e, + DEFAULT_OUTER_LOOP_PATTERN, "Jacobi", DevExecSpace(), scratch_size, scratch_level, + 0, pack_rhs.GetNBlocks() - 1, kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { - const int nvars = - pack_rhs.GetUpperBound(b) - pack_rhs.GetLowerBound(b) + 1; + const int nvars = pack_rhs.GetUpperBound(b) - pack_rhs.GetLowerBound(b) + 1; for (int c = 0; c < nvars; ++c) { Real *Ax = &pack_Ax(b, te, c, k, jb.s, ib.s); Real *diag = &pack_diag(b, te, c, k, jb.s, ib.s); @@ -263,8 +256,8 @@ class MGSolverStages : public SolverBase { auto comm = AddBoundaryExchangeTasks(depends_on, tl, md_in, multilevel); auto mat_mult = eqs_.template Ax(tl, comm, md_base, md_in, md_out); - return tl.AddTask(mat_mult, TF(&MGSolverStages::Jacobi), this, - md_rhs, md_out, md_diag, md_in, md_out, omega); + return tl.AddTask(mat_mult, TF(&MGSolverStages::Jacobi), this, md_rhs, md_out, + md_diag, md_in, md_out, omega); } template @@ -272,7 +265,7 @@ class MGSolverStages : public SolverBase { int partition, int level, Mesh *pmesh) { using namespace StageUtils; - const int ndim = pmesh->ndim; + const int ndim = pmesh->ndim; auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); @@ -325,7 +318,8 @@ class MGSolverStages : public SolverBase { tl.AddTask(task_out, TF(ReceiveBoundBufs), md); task_out = tl.AddTask(task_out, TF(SetBounds), md); } - task_out = tl.AddTask(task_out, BTF(&equations::template SetDiagonal), &eqs_, md, md_diag); + task_out = + tl.AddTask(task_out, BTF(&equations::template SetDiagonal), &eqs_, md, md_diag); // If we are finer than the coarsest level: if (level > min_level) { task_out = @@ -384,20 +378,23 @@ class MGSolverStages : public SolverBase { auto set_from_finer = dependence; if (level < max_level) { // Fill fields with restricted values - // TODO: ARGH, WTF this may not be fixable since we need to communicate on two stages concurrently + // TODO(LFR): Need to make sure that this communication pattern is ok, since we are + // trying to concurrently communicate on two stages auto recv_from_finer = tl.AddTask( dependence, TF(ReceiveBoundBufs), md_u); - set_from_finer = tl.AddTask( - recv_from_finer, BTF(SetBounds), md_u); - recv_from_finer = tl.AddTask( - set_from_finer, TF(ReceiveBoundBufs), md_res_err); + set_from_finer = tl.AddTask(recv_from_finer, + BTF(SetBounds), md_u); + recv_from_finer = + tl.AddTask(set_from_finer, + TF(ReceiveBoundBufs), md_res_err); set_from_finer = tl.AddTask( recv_from_finer, BTF(SetBounds), md_res_err); // 1. Copy residual from dual purpose communication field to the rhs, should be // actual RHS for finest level if (!do_FAS) { auto zero_u = tl.AddTask(set_from_finer, BTF(SetToZero), md_u); - auto copy_rhs = tl.AddTask(set_from_finer, BTF(CopyData), md_res_err, md_rhs); + auto copy_rhs = + tl.AddTask(set_from_finer, BTF(CopyData), md_res_err, md_rhs); set_from_finer = zero_u | copy_rhs; } else { // TODO(LFR): Determine if this boundary exchange task is required, I think it is @@ -406,72 +403,76 @@ class MGSolverStages : public SolverBase { // didn't seem to impact the solution. set_from_finer = AddBoundaryExchangeTasks( set_from_finer, tl, md_u, multilevel); - set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md_u, md_u0); + set_from_finer = + tl.AddTask(set_from_finer, BTF(CopyData), md_u, md_u0); // This should set the rhs only in blocks that correspond to interior nodes, the // RHS of leaf blocks that are on this GMG level should have already been set on // entry into multigrid set_from_finer = eqs_.template Ax(tl, set_from_finer, md, md_u, md_temp); - set_from_finer = - tl.AddTask(set_from_finer, - BTF(AddFieldsAndStoreInteriorSelect), - md_temp, md_res_err, md_rhs, 1.0, 1.0, true); + set_from_finer = tl.AddTask(set_from_finer, + BTF(AddFieldsAndStoreInteriorSelect), + md_temp, md_res_err, md_rhs, 1.0, 1.0, true); } } else { - set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md_u, md_u0); + set_from_finer = + tl.AddTask(set_from_finer, BTF(CopyData), md_u, md_u0); } // 2. Do pre-smooth and fill solution on this level - //set_from_finer = - // tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md, md_diag); + // set_from_finer = + // tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md, + // md_diag); auto pre_smooth = AddSRJIteration( tl, set_from_finer, pre_stages, multilevel, partition, level, pmesh); // If we are finer than the coarsest level: auto post_smooth = pre_smooth; if (level > min_level) { // 3. Communicate same level boundaries so that u is up to date everywhere - auto comm_u = AddBoundaryExchangeTasks(pre_smooth, tl, - md_u, multilevel); + auto comm_u = AddBoundaryExchangeTasks(pre_smooth, tl, md_u, + multilevel); // 4. Caclulate residual and store in communication field auto residual = eqs_.template Ax(tl, comm_u, md, md_u, md_temp); - residual = tl.AddTask( - residual, BTF(AddFieldsAndStoreInteriorSelect), md_rhs, md_temp, md_res_err, - 1.0, -1.0, false); + residual = tl.AddTask(residual, BTF(AddFieldsAndStoreInteriorSelect), + md_rhs, md_temp, md_res_err, 1.0, -1.0, false); // 5. Restrict communication field and send to next level - // TODO: ARGH, this also needs to get fixed, possibly - auto communicate_to_coarse = tl.AddTask( - residual, BTF(SendBoundBufs), md_u); - communicate_to_coarse = tl.AddTask( - communicate_to_coarse, BTF(SendBoundBufs), md_res_err); + // TODO(LFR): Other place where we are receiving two stage communication + auto communicate_to_coarse = + tl.AddTask(residual, BTF(SendBoundBufs), md_u); + communicate_to_coarse = + tl.AddTask(communicate_to_coarse, + BTF(SendBoundBufs), md_res_err); // 6. Receive error field into communication field and prolongate auto recv_from_coarser = tl.AddTask(communicate_to_coarse, TF(ReceiveBoundBufs), md_res_err); - auto set_from_coarser = tl.AddTask( - recv_from_coarser, BTF(SetBounds), md_res_err); + auto set_from_coarser = + tl.AddTask(recv_from_coarser, BTF(SetBounds), + md_res_err); auto prolongate = set_from_coarser; if (params_.prolongation == "User") { - //prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); + // prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); PARTHENON_FAIL("Not implemented."); } else { - prolongate = - tl.AddTask(set_from_coarser, - BTF(ProlongateBounds), md_res_err); + prolongate = tl.AddTask(set_from_coarser, + BTF(ProlongateBounds), + md_res_err); } // 7. Correct solution on this level with res_err field and store in // communication field - auto update_sol = tl.AddTask( - prolongate, BTF(AddFieldsAndStore), md_u, md_res_err, md_u, 1.0, 1.0); + auto update_sol = tl.AddTask(prolongate, BTF(AddFieldsAndStore), + md_u, md_res_err, md_u, 1.0, 1.0); // 8. Post smooth using communication field and stored RHS - post_smooth = AddSRJIteration(tl, update_sol, post_stages, - multilevel, partition, level, pmesh); + post_smooth = AddSRJIteration( + tl, update_sol, post_stages, multilevel, partition, level, pmesh); } else { - post_smooth = tl.AddTask(pre_smooth, BTF(CopyData), md_u, md_res_err); + post_smooth = + tl.AddTask(pre_smooth, BTF(CopyData), md_u, md_res_err); } // 9. Send communication field to next finer level (should be error field for that @@ -480,18 +481,19 @@ class MGSolverStages : public SolverBase { if (level < max_level) { auto copy_over = post_smooth; if (!do_FAS) { - copy_over = tl.AddTask(post_smooth, BTF(CopyData), md_u, md_res_err); + copy_over = + tl.AddTask(post_smooth, BTF(CopyData), md_u, md_res_err); } else { - auto calc_err = tl.AddTask( - post_smooth, BTF(AddFieldsAndStore), md_u, md_u0, md_res_err, 1.0, -1.0); + auto calc_err = tl.AddTask(post_smooth, BTF(AddFieldsAndStore), + md_u, md_u0, md_res_err, 1.0, -1.0); copy_over = calc_err; } // This is required to make sure boundaries of res_err are up to date before // prolongation auto boundary = AddBoundaryExchangeTasks( copy_over, tl, md_res_err, multilevel); - last_task = tl.AddTask(boundary, - BTF(SendBoundBufs), md_res_err); + last_task = tl.AddTask( + boundary, BTF(SendBoundBufs), md_res_err); } // The boundaries are not up to date on return return last_task; diff --git a/src/solvers/solver_utils_stages.hpp b/src/solvers/solver_utils_stages.hpp index fa7196349cc1..613bce728e07 100644 --- a/src/solvers/solver_utils_stages.hpp +++ b/src/solvers/solver_utils_stages.hpp @@ -46,9 +46,9 @@ TaskStatus CopyData(const std::shared_ptr> &md_in, const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); parthenon::par_for_outer( DEFAULT_OUTER_LOOP_PATTERN, "CopyData", DevExecSpace(), scratch_size, scratch_level, - 0, pack_in.GetNBlocks() - 1, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { - const int nvars = - pack_in.GetUpperBound(b) - pack_in.GetLowerBound(b) + 1; + 0, pack_in.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + const int nvars = pack_in.GetUpperBound(b) - pack_in.GetLowerBound(b) + 1; for (int c = 0; c < nvars; ++c) { Real *in = &pack_in(b, te, c, kb.s, jb.s, ib.s); Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); @@ -79,7 +79,7 @@ TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> for (int b = 0; b < nblocks; ++b) include_block[b] = md_a->GetBlockData(b)->GetBlockPointer()->neighbors.size() == 0; } - + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); auto pack_a = desc.GetPack(md_a.get(), include_block, only_fine_on_composite); auto pack_b = desc.GetPack(md_b.get(), include_block, only_fine_on_composite); @@ -108,10 +108,10 @@ TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> template TaskStatus AddFieldsAndStore(const std::shared_ptr> &md_a, const std::shared_ptr> &md_b, - const std::shared_ptr> &md_out, - Real wa = 1.0, Real wb = 1.0) { - return AddFieldsAndStoreInteriorSelect( - md_a, md_b, md_out, wa, wb, false); + const std::shared_ptr> &md_out, Real wa = 1.0, + Real wb = 1.0) { + return AddFieldsAndStoreInteriorSelect(md_a, md_b, md_out, + wa, wb, false); } template @@ -137,8 +137,7 @@ TaskStatus SetToZero(const std::shared_ptr> &md) { for (int c = 0; c < nvars; ++c) { parthenon::par_for_inner( parthenon::inner_loop_pattern_simdfor_tag, member, kb.s, kb.e, jb.s, jb.e, - ib.s, ib.e, - [&](int k, int j, int i) { pack(b, te, c, k, j, i) = 0.0; }); + ib.s, ib.e, [&](int k, int j, int i) { pack(b, te, c, k, j, i) = 0.0; }); } }); return TaskStatus::complete; @@ -157,13 +156,12 @@ TaskStatus ADividedByB(const std::shared_ptr> &md_a, auto pack_b = desc.GetPack(md_b.get()); auto pack_out = desc.GetPack(md_out.get()); parthenon::par_for( - DEFAULT_LOOP_PATTERN, "DotProduct", DevExecSpace(), 0, pack_a.GetNBlocks() - 1, kb.s, - kb.e, jb.s, jb.e, ib.s, ib.e, + DEFAULT_LOOP_PATTERN, "DotProduct", DevExecSpace(), 0, pack_a.GetNBlocks() - 1, + kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; for (int c = 0; c < nvars; ++c) - pack_out(b, c, k, j, i) = - pack_a(b, c, k, j, i) / pack_b(b, c, k, j, i); + pack_out(b, c, k, j, i) = pack_a(b, c, k, j, i) / pack_b(b, c, k, j, i); }); return TaskStatus::complete; } @@ -210,8 +208,8 @@ TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, return TaskStatus::complete; }, adotb); - auto get_adotb = tl.AddTask(TaskQualifier::local_sync, zero_adotb, - DotProductLocal, md_a, md_b, adotb); + auto get_adotb = tl.AddTask(TaskQualifier::local_sync, zero_adotb, DotProductLocal, + md_a, md_b, adotb); auto start_global_adotb = tl.AddTask(TaskQualifier::once_per_region, get_adotb, &AllReduce::StartReduce, adotb, MPI_SUM); auto finish_global_adotb = @@ -220,7 +218,7 @@ TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, return finish_global_adotb; } -} // namespace utils +} // namespace StageUtils } // namespace solvers From 548dc7f0740a972e95bd9389bd13b2f98b5f74e3 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 14:36:48 -0600 Subject: [PATCH 34/62] Allow user defined prolongation in stage based solvers --- .../poisson_gmg/poisson_equation_stages.hpp | 28 ++++++++++--------- src/interface/make_pack_descriptor.hpp | 6 ---- src/solvers/mg_solver_stages.hpp | 5 +--- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index 0411cdee0f0a..b3e6aa36d3aa 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -67,9 +67,9 @@ class PoissonEquationStages { std::shared_ptr> &md_in, std::shared_ptr> &md_out) { auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md_mat, md_in); - // if (set_flux_boundary) { - // flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md, include_flux_dx); - // } + if (set_flux_boundary) { + flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md_mat, md_in, include_flux_dx); + } if (do_flux_cor && !(md_mat->grid.type == parthenon::GridType::two_level_composite)) { auto start_flxcor = tl.AddTask(flux_res, parthenon::StartReceiveFluxCorrections, md_in); @@ -192,17 +192,17 @@ class PoissonEquationStages { return TaskStatus::complete; } - template + template parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, std::shared_ptr> &md) { if (prolongation_type == ProlongationType::Constant) { - return tl.AddTask(depends_on, ProlongateImpl, + return tl.AddTask(depends_on, ProlongateImpl, md); } else if (prolongation_type == ProlongationType::Linear) { - return tl.AddTask(depends_on, ProlongateImpl, + return tl.AddTask(depends_on, ProlongateImpl, md); } else if (prolongation_type == ProlongationType::Kwak) { - return tl.AddTask(depends_on, ProlongateImpl, + return tl.AddTask(depends_on, ProlongateImpl, md); } return depends_on; @@ -227,7 +227,7 @@ class PoissonEquationStages { return 0.0; } - template + template static parthenon::TaskStatus ProlongateImpl(std::shared_ptr> &md) { using namespace parthenon; @@ -247,9 +247,9 @@ class PoissonEquationStages { include_block[b] = md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); } - const auto desc = parthenon::MakePackDescriptor(md.get()); + const auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); const auto desc_coarse = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); + parthenon::MakePackDescriptorFromTypeList(md.get(), std::vector{}, std::set{PDOpt::Coarse}); auto pack = desc.GetPack(md.get(), include_block); auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); @@ -314,7 +314,7 @@ class PoissonEquationStages { } static parthenon::TaskStatus - SetFluxBoundaries(std::shared_ptr> &md, bool do_flux_dx) { + SetFluxBoundaries(std::shared_ptr> &md_mat, std::shared_ptr> &md, bool do_flux_dx) { using namespace parthenon; const int ndim = md->GetMeshPointer()->ndim; IndexRange ib = md->GetBoundsI(IndexDomain::interior); @@ -327,8 +327,10 @@ class PoissonEquationStages { std::vector include_block(nblocks, true); auto desc = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + auto desc_mat = parthenon::MakePackDescriptor(md.get()); auto pack = desc.GetPack(md.get(), include_block); + auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block); const std::size_t scratch_size_in_bytes = 0; const std::size_t scratch_level = 1; @@ -369,7 +371,7 @@ class PoissonEquationStages { [&](const int idx) { const auto [k, j, i] = idxer(idx); pack.flux(b, dir, var_t(), k, j, i) = - sign * pack(b, te, D_t(), k, j, i) * + sign * pack_mat(b, te, D_t(), k, j, i) * pack(b, var_t(), k + koff, j + joff, i + ioff) / (0.5 * dx); }); } diff --git a/src/interface/make_pack_descriptor.hpp b/src/interface/make_pack_descriptor.hpp index 407ddfca8f83..2a604338cdb7 100644 --- a/src/interface/make_pack_descriptor.hpp +++ b/src/interface/make_pack_descriptor.hpp @@ -85,12 +85,6 @@ inline auto MakePackDescriptor(StateDescriptor *psd, const std::vector -inline auto MakePackDescriptor(MeshData *pmd, Args &&...args) { - return MakePackDescriptor(pmd->GetMeshPointer()->resolved_packages.get(), - std::forward(args)...); -} - template inline auto MakePackDescriptor(MeshBlockData *pmbd, const std::vector &flags = {}, diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index be0dec6e09de..1c39fb2cc944 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -419,9 +419,6 @@ class MGSolverStages : public SolverBase { } // 2. Do pre-smooth and fill solution on this level - // set_from_finer = - // tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md, - // md_diag); auto pre_smooth = AddSRJIteration( tl, set_from_finer, pre_stages, multilevel, partition, level, pmesh); // If we are finer than the coarsest level: @@ -453,7 +450,7 @@ class MGSolverStages : public SolverBase { md_res_err); auto prolongate = set_from_coarser; if (params_.prolongation == "User") { - // prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); + prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); PARTHENON_FAIL("Not implemented."); } else { prolongate = tl.AddTask(set_from_coarser, From c090262678cb5a4066eea12022aac321eafab3a5 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 14:49:55 -0600 Subject: [PATCH 35/62] small --- example/poisson_gmg/poisson_equation_stages.hpp | 17 ++++++++--------- src/solvers/mg_solver_stages.hpp | 1 - 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index b3e6aa36d3aa..46189fc40560 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -14,6 +14,7 @@ #define EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ #include +#include #include #include #include @@ -199,11 +200,9 @@ class PoissonEquationStages { return tl.AddTask(depends_on, ProlongateImpl, md); } else if (prolongation_type == ProlongationType::Linear) { - return tl.AddTask(depends_on, ProlongateImpl, - md); + return tl.AddTask(depends_on, ProlongateImpl, md); } else if (prolongation_type == ProlongationType::Kwak) { - return tl.AddTask(depends_on, ProlongateImpl, - md); + return tl.AddTask(depends_on, ProlongateImpl, md); } return depends_on; } @@ -248,8 +247,8 @@ class PoissonEquationStages { md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); } const auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); - const auto desc_coarse = - parthenon::MakePackDescriptorFromTypeList(md.get(), std::vector{}, std::set{PDOpt::Coarse}); + const auto desc_coarse = parthenon::MakePackDescriptorFromTypeList( + md.get(), std::vector{}, std::set{PDOpt::Coarse}); auto pack = desc.GetPack(md.get(), include_block); auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); @@ -314,7 +313,8 @@ class PoissonEquationStages { } static parthenon::TaskStatus - SetFluxBoundaries(std::shared_ptr> &md_mat, std::shared_ptr> &md, bool do_flux_dx) { + SetFluxBoundaries(std::shared_ptr> &md_mat, + std::shared_ptr> &md, bool do_flux_dx) { using namespace parthenon; const int ndim = md->GetMeshPointer()->ndim; IndexRange ib = md->GetBoundsI(IndexDomain::interior); @@ -326,8 +326,7 @@ class PoissonEquationStages { int nblocks = md->NumBlocks(); std::vector include_block(nblocks, true); - auto desc = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); + auto desc = parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); auto desc_mat = parthenon::MakePackDescriptor(md.get()); auto pack = desc.GetPack(md.get(), include_block); auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block); diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 1c39fb2cc944..40e12f8665e3 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -451,7 +451,6 @@ class MGSolverStages : public SolverBase { auto prolongate = set_from_coarser; if (params_.prolongation == "User") { prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); - PARTHENON_FAIL("Not implemented."); } else { prolongate = tl.AddTask(set_from_coarser, BTF(ProlongateBounds), From 8dd7962895cc140ccafffdbab4b17ea049e32928 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 15:13:58 -0600 Subject: [PATCH 36/62] make base class separate file --- src/CMakeLists.txt | 1 + src/solvers/bicgstab_solver.hpp | 1 + src/solvers/bicgstab_solver_stages.hpp | 1 + src/solvers/cg_solver.hpp | 1 + src/solvers/cg_solver_stages.hpp | 2 +- src/solvers/mg_solver.hpp | 18 +-------- src/solvers/mg_solver_stages.hpp | 2 +- src/solvers/solver_base.hpp | 53 ++++++++++++++++++++++++++ 8 files changed, 60 insertions(+), 19 deletions(-) create mode 100644 src/solvers/solver_base.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6be89faaa4e8..165c7c0c357b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -232,6 +232,7 @@ add_library(parthenon solvers/cg_solver_stages.hpp solvers/mg_solver.hpp solvers/mg_solver_stages.hpp + solvers/solver_base.hpp solvers/solver_utils.hpp solvers/solver_utils_stages.hpp diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index 7a14982097b1..94a5824f47aa 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -24,6 +24,7 @@ #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" #include "solvers/mg_solver.hpp" +#include "solvers/solver_base.hpp" #include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 2ba81187d515..5c3ac98d89a5 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -25,6 +25,7 @@ #include "kokkos_abstraction.hpp" #include "solvers/mg_solver.hpp" #include "solvers/mg_solver_stages.hpp" +#include "solvers/solver_base.hpp" #include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index 85a789423c91..307fb9cff157 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -25,6 +25,7 @@ #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" #include "solvers/mg_solver.hpp" +#include "solvers/solver_base.hpp" #include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index 8de9baf73576..e02a469f2398 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -27,7 +27,7 @@ #include "solvers/cg_solver.hpp" #include "solvers/mg_solver.hpp" #include "solvers/mg_solver_stages.hpp" -#include "solvers/solver_utils.hpp" +#include "solvers/solver_base.hpp" #include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index 1e92473e0282..d6b72bbe06e9 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -25,6 +25,7 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" +#include "solvers/solver_base.hpp" #include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" #include "utils/robust.hpp" @@ -58,23 +59,6 @@ struct MGParams { } }; -class SolverBase { - public: - virtual ~SolverBase() {} - - virtual TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) = 0; - virtual TaskID AddTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) = 0; - - Real GetFinalResidual() const { return final_residual; } - int GetFinalIterations() const { return final_iteration; } - - protected: - Real final_residual; - int final_iteration; -}; - // The equations class must include a template method // // template diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 40e12f8665e3..bc22dd776a00 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -25,7 +25,7 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" -#include "solvers/solver_utils.hpp" +#include "solvers/solver_base.hpp" #include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" #include "utils/robust.hpp" diff --git a/src/solvers/solver_base.hpp b/src/solvers/solver_base.hpp new file mode 100644 index 000000000000..c5fc1653a2e5 --- /dev/null +++ b/src/solvers/solver_base.hpp @@ -0,0 +1,53 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_SOLVER_BASE_HPP_ +#define SOLVERS_SOLVER_BASE_HPP_ + +#include +#include +#include +#include +#include +#include +#include + +#include "interface/mesh_data.hpp" +#include "interface/meshblock_data.hpp" +#include "tasks/tasks.hpp" + +namespace parthenon { + +namespace solvers { + +class SolverBase { + public: + virtual ~SolverBase() {} + + virtual TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) = 0; + virtual TaskID AddTasks(TaskList &tl, TaskID dependence, int partition, + Mesh *pmesh) = 0; + + Real GetFinalResidual() const { return final_residual; } + int GetFinalIterations() const { return final_iteration; } + + protected: + Real final_residual; + int final_iteration; +}; + +} // namespace solvers + +} // namespace parthenon + +#endif // SOLVERS_SOLVER_BASE_HPP_ From dcd7bef3d86b65731ad61cc17d9a8c6a84a35640 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 3 Oct 2024 15:19:15 -0600 Subject: [PATCH 37/62] format --- src/basic_types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic_types.hpp b/src/basic_types.hpp index 401aa90f3b9d..4f90a833a2fa 100644 --- a/src/basic_types.hpp +++ b/src/basic_types.hpp @@ -42,7 +42,7 @@ using Real = double; struct IndexRange { int s = 0; /// Starting Index (inclusive) int e = 0; /// Ending Index (inclusive) - int size() const { return e - s + 1;} + int size() const { return e - s + 1; } operator std::pair() const { return {s, e}; } }; From c6784497b304490b8802836bb0f6343ce52f3a46 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 8 Oct 2024 15:07:03 -0600 Subject: [PATCH 38/62] moving toward separate def of interior prolongation operators --- example/poisson_gmg/poisson_package.cpp | 4 +- src/CMakeLists.txt | 1 + src/solvers/bicgstab_solver_stages.hpp | 4 +- src/solvers/cg_solver_stages.hpp | 4 +- src/solvers/internal_prolongation.hpp | 194 ++++++++++++++++++++++++ src/solvers/mg_solver_stages.hpp | 42 +++-- 6 files changed, 232 insertions(+), 17 deletions(-) create mode 100644 src/solvers/internal_prolongation.hpp diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index ca7a7964660d..01ffc98f70dd 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -113,12 +113,12 @@ std::shared_ptr Initialize(ParameterInput *pin) { using PoissEqStages = poisson_package::PoissonEquationStages; parthenon::solvers::CGParams params(pin, "poisson/solver_params"); psolver = std::make_shared>( - "base", "u", "rhs", pkg.get(), params, PoissEqStages(pin, "poisson")); + "base", "u", "rhs", params, PoissEqStages(pin, "poisson")); } else if (solver == "BiCGSTABStages") { using PoissEqStages = poisson_package::PoissonEquationStages; parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); psolver = std::make_shared>( - "base", "u", "rhs", pkg.get(), params, PoissEqStages(pin, "poisson")); + "base", "u", "rhs", params, PoissEqStages(pin, "poisson")); } else { PARTHENON_FAIL("Unknown solver type."); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 165c7c0c357b..15653fc48032 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -230,6 +230,7 @@ add_library(parthenon solvers/bicgstab_solver_stages.hpp solvers/cg_solver.hpp solvers/cg_solver_stages.hpp + solvers/internal_prolongation.hpp solvers/mg_solver.hpp solvers/mg_solver_stages.hpp solvers/solver_base.hpp diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 5c3ac98d89a5..4a316318f1c3 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -62,9 +62,9 @@ class BiCGSTABSolverStages : public SolverBase { public: BiCGSTABSolverStages(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, StateDescriptor *pkg, + const std::string &container_rhs, BiCGSTABParams params_in, equations eq_in = equations()) - : preconditioner(container_base, container_u, container_rhs, pkg, + : preconditioner(container_base, container_u, container_rhs, params_in.mg_params, eq_in), container_base(container_base), container_u(container_u), container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index e02a469f2398..e1b40942c6e9 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -63,9 +63,9 @@ class CGSolverStages : public SolverBase { public: CGSolverStages(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, StateDescriptor *pkg, + const std::string &container_rhs, CGParams params_in, const equations &eq_in = equations()) - : preconditioner(container_base, container_u, container_rhs, pkg, + : preconditioner(container_base, container_u, container_rhs, params_in.mg_params, eq_in), container_base(container_base), container_u(container_u), container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { diff --git a/src/solvers/internal_prolongation.hpp b/src/solvers/internal_prolongation.hpp new file mode 100644 index 000000000000..18ceeca4b78b --- /dev/null +++ b/src/solvers/internal_prolongation.hpp @@ -0,0 +1,194 @@ +//======================================================================================== +// (C) (or copyright) 2021-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== +#ifndef SOLVERS_INTERNAL_PROLONGATION_HPP_ +#define SOLVERS_INTERNAL_PROLONGATION_HPP_ + +#include +#include +#include +#include +#include +#include + +#include "kokkos_abstraction.hpp" + +namespace parthenon { + +namespace solvers { +// This uses the prolongation operator set in the fields metadata when doing prolongation +// in the interior of a block during multigrid +class ProlongationBlockInteriorDefault { + public: + ProlongationBlockInteriorDefault() = default; + ProlongationBlockInteriorDefault(parthenon::ParameterInput *pin, const std::string &label) { + auto pro_int = pin->GetOrAddString(label, "block_interior_prolongation", "MetadataDefault"); + PARTHENON_REQUIRE(pro_int == "MetadataDefault", "Parameter input specifies an unsupported block interior prolongation type."); + } + + template + parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, + std::shared_ptr> &md) { + return tl.AddTask(depends_on, + TF(ProlongateBounds), + md); + } +}; + +// Using this class overrides the prolongation operator set in a fields metadata when +// doing prolongation over the interior of a block during multigrid +class ProlongationBlockInteriorZeroDirichlet { + public: + + enum class ProlongationType { Constant, Linear, Kwak }; + ProlongationType prolongation_type = ProlongationType::Linear; + + ProlongationBlockInteriorZeroDirichlet() = default; + ProlongationBlockInteriorZeroDirichlet(parthenon::ParameterInput *pin, const std::string &label) { + auto pro_int = pin->GetOrAddString(label, "block_interior_prolongation", "Linear"); + if (pro_int == "Constant") { + prolongation_type = ProlongationType::Constant; + } else if (pro_int == "Linear") { + prolongation_type = ProlongationType::Linear; + } else if (pro_int == "Kwak") { + prolongation_type = ProlongationType::Kwak; + } else { + PARTHENON_FAIL("Invalid zero Dirichlet prolongation type."); + } + } + + template + parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, + std::shared_ptr> &md) { + if (prolongation_type == ProlongationType::Constant) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } else if (prolongation_type == ProlongationType::Linear) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } else if (prolongation_type == ProlongationType::Kwak) { + return tl.AddTask(depends_on, ProlongateImpl, + md); + } + return depends_on; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real LinearFactor(int d, bool lo_bound, bool up_bound) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1) return (2.0 + !up_bound) / 4.0; + if (d == -1) return (2.0 + !lo_bound) / 4.0; + if (d == 3) return !up_bound / 4.0; + if (d == -3) return !lo_bound / 4.0; + return 0.0; + } + + KOKKOS_FORCEINLINE_FUNCTION + static Real QuadraticFactor(int d) { + if (d == 0) return 1.0; // Indicates this dimension is not included + if (d == 1 || d == -1) return 30.0 / 32.0; + if (d == 3 || d == -3) return 5.0 / 32.0; + if (d == 5 || d == -5) return -3.0 / 32.0; + return 0.0; + } + + template + static parthenon::TaskStatus + ProlongateImpl(std::shared_ptr> &md) { + using namespace parthenon; + const int ndim = md->GetMeshPointer()->ndim; + IndexRange ib = md->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBoundsK(IndexDomain::interior); + IndexRange cib = md->GetBoundsI(CellLevel::coarse, IndexDomain::interior); + IndexRange cjb = md->GetBoundsJ(CellLevel::coarse, IndexDomain::interior); + IndexRange ckb = md->GetBoundsK(CellLevel::coarse, IndexDomain::interior); + + using TE = parthenon::TopologicalElement; + + int nblocks = md->NumBlocks(); + std::vector include_block(nblocks, true); + for (int b = 0; b < nblocks; ++b) { + include_block[b] = + md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); + } + const auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); + const auto desc_coarse = + parthenon::MakePackDescriptorFromTypeList(md.get(), {}, {PDOpt::Coarse}); + auto pack = desc.GetPack(md.get(), include_block); + auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); + + parthenon::par_for( + "Prolongate", 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), + pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, + const int fi) { + const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; + const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; + const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; + const int fok = (fk - kb.s) % 2; + const int foj = (fj - jb.s) % 2; + const int foi = (fi - ib.s) % 2; + const bool bound[6]{pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), + pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), + pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), + pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), + pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), + pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; + // Use both pack and pack_coarse outside of the constexpr if + // statements to prevent compilation errors in some CUDA compilers + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); + if constexpr (ProlongationType::Constant == prolongation_type) { + pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); + } else if constexpr (ProlongationType::Linear == prolongation_type) { + pack(b, n, fk, fj, fi) = 0.0; + for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { + for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { + for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { + const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; + const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; + const int dx1 = 4 * oi - (2 * foi - 1); + pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) * + LinearFactor(dx2, bound[2], bound[3]) * + LinearFactor(dx3, bound[4], bound[5]) * + pack_coarse(b, n, ck + ok, cj + oj, ci + oi); + } + } + } + } else if constexpr (ProlongationType::Kwak == prolongation_type) { + pack(b, n, fk, fj, fi) = 0.0; + if (ndim > 2 && !bound[4 + fok]) { + for (int ok = fok - 1; ok <= fok; ++ok) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); + } + } + if (ndim > 1 && !bound[2 + foj]) { + for (int oj = foj - 1; oj <= foj; ++oj) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); + } + } + if (ndim > 0 && !bound[foi]) { + for (int oi = foi - 1; oi <= foi; ++oi) { + pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); + } + } + pack(b, n, fk, fj, fi) /= 2.0 * ndim; + } + }); + return TaskStatus::complete; + } +}; +} // namespace solvers + +} // namespace parthenon + +#endif // SOLVERS_SOLVER_UTILS_STAGES_HPP_ diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index bc22dd776a00..abbc96beef28 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -25,6 +25,7 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" +#include "solvers/internal_prolongation.hpp" #include "solvers/solver_base.hpp" #include "solvers/solver_utils_stages.hpp" #include "tasks/tasks.hpp" @@ -35,7 +36,7 @@ namespace parthenon { namespace solvers { -// The equations class must include a template method +// The equations_t class must include a template method // // template // TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) @@ -49,10 +50,10 @@ namespace solvers { // // That stores the (possibly approximate) diagonal of matrix A in the field // associated with the type diag_t. This is used for Jacobi iteration. -template +template class MGSolverStages : public SolverBase { public: - using FieldTL = typename equations::IndependentVars; + using FieldTL = typename equations_t::IndependentVars; std::vector sol_fields; @@ -67,12 +68,30 @@ class MGSolverStages : public SolverBase { std::string container_rhs; // Internal containers for solver which create deep copies of sol_fields std::string container_res_err, container_temp, container_u0, container_diag; - - MGSolverStages(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, StateDescriptor *pkg, - MGParams params_in, equations eq_in = equations()) - : container_base(container_base), container_u(container_u), - container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + + MGSolverStages(const std::string &container_base, + const std::string &container_u, + const std::string &container_rhs, + ParameterInput *pin, + const std::string &input_block, + equations_t eq_in = equations_t()) + : MGSolverStages(container_base, container_u, container_rhs, + MGParams(pin, input_block), eq_in, + prolongator_t(pin, input_block)) {} + + MGSolverStages(const std::string &container_base, + const std::string &container_u, + const std::string &container_rhs, + MGParams params_in, + equations_t eq_in = equations_t(), + prolongator_t prol_in = prolongator_t()) + : container_base(container_base), + container_u(container_u), + container_rhs(container_rhs), + params_(params_in), + iter_counter(0), + eqs_(eq_in), + prolongator_(prol_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); std::string solver_id = "mg"; @@ -178,7 +197,8 @@ class MGSolverStages : public SolverBase { MGParams params_; int iter_counter; AllReduce residual; - equations eqs_; + equations_t eqs_; + prolongator_t prolongator_; std::string container_; // These functions apparently have to be public to compile with cuda since @@ -319,7 +339,7 @@ class MGSolverStages : public SolverBase { task_out = tl.AddTask(task_out, TF(SetBounds), md); } task_out = - tl.AddTask(task_out, BTF(&equations::template SetDiagonal), &eqs_, md, md_diag); + tl.AddTask(task_out, BTF(&equations_t::template SetDiagonal), &eqs_, md, md_diag); // If we are finer than the coarsest level: if (level > min_level) { task_out = From 75ef64c532d1430542f0edf951ee97aca2f04e19 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 8 Oct 2024 16:18:09 -0600 Subject: [PATCH 39/62] refactor user definable block interior prolongation --- .../poisson_gmg/poisson_equation_stages.hpp | 119 ------------------ example/poisson_gmg/poisson_package.cpp | 18 ++- src/solvers/bicgstab_solver_stages.hpp | 14 +-- src/solvers/cg_solver_stages.hpp | 14 +-- src/solvers/internal_prolongation.hpp | 45 ++++--- src/solvers/mg_solver_stages.hpp | 35 ++---- 6 files changed, 63 insertions(+), 182 deletions(-) diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index 46189fc40560..813e4f9e8501 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -193,125 +193,6 @@ class PoissonEquationStages { return TaskStatus::complete; } - template - parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, - std::shared_ptr> &md) { - if (prolongation_type == ProlongationType::Constant) { - return tl.AddTask(depends_on, ProlongateImpl, - md); - } else if (prolongation_type == ProlongationType::Linear) { - return tl.AddTask(depends_on, ProlongateImpl, md); - } else if (prolongation_type == ProlongationType::Kwak) { - return tl.AddTask(depends_on, ProlongateImpl, md); - } - return depends_on; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real LinearFactor(int d, bool lo_bound, bool up_bound) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1) return (2.0 + !up_bound) / 4.0; - if (d == -1) return (2.0 + !lo_bound) / 4.0; - if (d == 3) return !up_bound / 4.0; - if (d == -3) return !lo_bound / 4.0; - return 0.0; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real QuadraticFactor(int d) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1 || d == -1) return 30.0 / 32.0; - if (d == 3 || d == -3) return 5.0 / 32.0; - if (d == 5 || d == -5) return -3.0 / 32.0; - return 0.0; - } - - template - static parthenon::TaskStatus - ProlongateImpl(std::shared_ptr> &md) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - IndexRange ib = md->GetBoundsI(IndexDomain::interior); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior); - IndexRange kb = md->GetBoundsK(IndexDomain::interior); - IndexRange cib = md->GetBoundsI(CellLevel::coarse, IndexDomain::interior); - IndexRange cjb = md->GetBoundsJ(CellLevel::coarse, IndexDomain::interior); - IndexRange ckb = md->GetBoundsK(CellLevel::coarse, IndexDomain::interior); - - using TE = parthenon::TopologicalElement; - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - for (int b = 0; b < nblocks; ++b) { - include_block[b] = - md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); - } - const auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); - const auto desc_coarse = parthenon::MakePackDescriptorFromTypeList( - md.get(), std::vector{}, std::set{PDOpt::Coarse}); - auto pack = desc.GetPack(md.get(), include_block); - auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); - - parthenon::par_for( - "Prolongate", 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), - pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, - const int fi) { - const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; - const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; - const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; - const int fok = (fk - kb.s) % 2; - const int foj = (fj - jb.s) % 2; - const int foi = (fi - ib.s) % 2; - const bool bound[6]{pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), - pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), - pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), - pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), - pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), - pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; - // Use both pack and pack_coarse outside of the constexpr if - // statements to prevent compilation errors in some CUDA compilers - pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); - if constexpr (ProlongationType::Constant == prolongation_type) { - pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); - } else if constexpr (ProlongationType::Linear == prolongation_type) { - pack(b, n, fk, fj, fi) = 0.0; - for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { - for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { - for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { - const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; - const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; - const int dx1 = 4 * oi - (2 * foi - 1); - pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) * - LinearFactor(dx2, bound[2], bound[3]) * - LinearFactor(dx3, bound[4], bound[5]) * - pack_coarse(b, n, ck + ok, cj + oj, ci + oi); - } - } - } - } else if constexpr (ProlongationType::Kwak == prolongation_type) { - pack(b, n, fk, fj, fi) = 0.0; - if (ndim > 2 && !bound[4 + fok]) { - for (int ok = fok - 1; ok <= fok; ++ok) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); - } - } - if (ndim > 1 && !bound[2 + foj]) { - for (int oj = foj - 1; oj <= foj; ++oj) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); - } - } - if (ndim > 0 && !bound[foi]) { - for (int oi = foi - 1; oi <= foi; ++oi) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); - } - } - pack(b, n, fk, fj, fi) /= 2.0 * ndim; - } - }); - return TaskStatus::complete; - } - static parthenon::TaskStatus SetFluxBoundaries(std::shared_ptr> &md_mat, std::shared_ptr> &md, bool do_flux_dx) { diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 01ffc98f70dd..25dcf4c9f682 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -111,14 +111,20 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg.get(), params, eq); } else if (solver == "CGStages") { using PoissEqStages = poisson_package::PoissonEquationStages; - parthenon::solvers::CGParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>( - "base", "u", "rhs", params, PoissEqStages(pin, "poisson")); + using prolongator_t = parthenon::solvers::ProlongationBlockInteriorZeroDirichlet; + using preconditioner_t = + parthenon::solvers::MGSolverStages; + psolver = std::make_shared< + parthenon::solvers::CGSolverStages>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); } else if (solver == "BiCGSTABStages") { using PoissEqStages = poisson_package::PoissonEquationStages; - parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>( - "base", "u", "rhs", params, PoissEqStages(pin, "poisson")); + using prolongator_t = parthenon::solvers::ProlongationBlockInteriorZeroDirichlet; + using preconditioner_t = + parthenon::solvers::MGSolverStages; + psolver = std::make_shared< + parthenon::solvers::BiCGSTABSolverStages>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); } else { PARTHENON_FAIL("Unknown solver type."); } diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 4a316318f1c3..dd08c5ae97b3 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -41,10 +41,9 @@ namespace solvers { // // that takes a field associated with x_t and applies // the matrix A to it and stores the result in y_t. -template +template > class BiCGSTABSolverStages : public SolverBase { using FieldTL = typename equations::IndependentVars; - using preconditioner_t = MGSolverStages; std::vector sol_fields; // Name of user defined container that should contain information required to @@ -62,12 +61,13 @@ class BiCGSTABSolverStages : public SolverBase { public: BiCGSTABSolverStages(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, - BiCGSTABParams params_in, equations eq_in = equations()) - : preconditioner(container_base, container_u, container_rhs, - params_in.mg_params, eq_in), + const std::string &container_rhs, ParameterInput *pin, + const std::string &input_block, equations eq_in = equations()) + : preconditioner(container_base, container_u, container_rhs, pin, input_block, + eq_in), container_base(container_base), container_u(container_u), - container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + container_rhs(container_rhs), params_(pin, input_block), iter_counter(0), + eqs_(eq_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); std::string solver_id = "bicgstab"; diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index e1b40942c6e9..6819fa9f1504 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -43,10 +43,9 @@ namespace solvers { // // that takes a field associated with x_t and applies // the matrix A to it and stores the result in y_t. -template +template > class CGSolverStages : public SolverBase { using FieldTL = typename equations::IndependentVars; - using preconditioner_t = MGSolverStages; std::vector sol_fields; // Name of user defined container that should contain information required to @@ -63,12 +62,13 @@ class CGSolverStages : public SolverBase { public: CGSolverStages(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, - CGParams params_in, const equations &eq_in = equations()) - : preconditioner(container_base, container_u, container_rhs, - params_in.mg_params, eq_in), + const std::string &container_rhs, ParameterInput *pin, + const std::string &input_block, const equations &eq_in = equations()) + : preconditioner(container_base, container_u, container_rhs, pin, input_block, + eq_in), container_base(container_base), container_u(container_u), - container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in) { + container_rhs(container_rhs), params_(pin, input_block), iter_counter(0), + eqs_(eq_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); std::string solver_id = "cg"; diff --git a/src/solvers/internal_prolongation.hpp b/src/solvers/internal_prolongation.hpp index 18ceeca4b78b..676d0733b163 100644 --- a/src/solvers/internal_prolongation.hpp +++ b/src/solvers/internal_prolongation.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -25,35 +26,38 @@ namespace parthenon { namespace solvers { -// This uses the prolongation operator set in the fields metadata when doing prolongation +// This uses the prolongation operator set in the fields metadata when doing prolongation // in the interior of a block during multigrid class ProlongationBlockInteriorDefault { public: ProlongationBlockInteriorDefault() = default; - ProlongationBlockInteriorDefault(parthenon::ParameterInput *pin, const std::string &label) { - auto pro_int = pin->GetOrAddString(label, "block_interior_prolongation", "MetadataDefault"); - PARTHENON_REQUIRE(pro_int == "MetadataDefault", "Parameter input specifies an unsupported block interior prolongation type."); + ProlongationBlockInteriorDefault(parthenon::ParameterInput *pin, + const std::string &label) { + auto pro_int = + pin->GetOrAddString(label, "block_interior_prolongation", "MetadataDefault"); + PARTHENON_REQUIRE( + pro_int == "MetadataDefault", + "Parameter input specifies an unsupported block interior prolongation type."); } template parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, std::shared_ptr> &md) { - return tl.AddTask(depends_on, - TF(ProlongateBounds), - md); + return tl.AddTask(depends_on, TF(ProlongateBounds), + md); } }; -// Using this class overrides the prolongation operator set in a fields metadata when +// Using this class overrides the prolongation operator set in a fields metadata when // doing prolongation over the interior of a block during multigrid class ProlongationBlockInteriorZeroDirichlet { public: - - enum class ProlongationType { Constant, Linear, Kwak }; + enum class ProlongationType { MetadataDefault, Constant, Linear, Kwak }; ProlongationType prolongation_type = ProlongationType::Linear; - + ProlongationBlockInteriorZeroDirichlet() = default; - ProlongationBlockInteriorZeroDirichlet(parthenon::ParameterInput *pin, const std::string &label) { + ProlongationBlockInteriorZeroDirichlet(parthenon::ParameterInput *pin, + const std::string &label) { auto pro_int = pin->GetOrAddString(label, "block_interior_prolongation", "Linear"); if (pro_int == "Constant") { prolongation_type = ProlongationType::Constant; @@ -61,6 +65,8 @@ class ProlongationBlockInteriorZeroDirichlet { prolongation_type = ProlongationType::Linear; } else if (pro_int == "Kwak") { prolongation_type = ProlongationType::Kwak; + } else if (pro_int == "MetadataDefault") { + prolongation_type = ProlongationType::MetadataDefault; } else { PARTHENON_FAIL("Invalid zero Dirichlet prolongation type."); } @@ -70,14 +76,17 @@ class ProlongationBlockInteriorZeroDirichlet { parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, std::shared_ptr> &md) { if (prolongation_type == ProlongationType::Constant) { - return tl.AddTask(depends_on, ProlongateImpl, + return tl.AddTask(depends_on, TF(ProlongateImpl), md); } else if (prolongation_type == ProlongationType::Linear) { - return tl.AddTask(depends_on, ProlongateImpl, + return tl.AddTask(depends_on, TF(ProlongateImpl), md); } else if (prolongation_type == ProlongationType::Kwak) { - return tl.AddTask(depends_on, ProlongateImpl, + return tl.AddTask(depends_on, TF(ProlongateImpl), md); + } else if (prolongation_type == ProlongationType::MetadataDefault) { + return tl.AddTask(depends_on, + TF(ProlongateBounds), md); } return depends_on; } @@ -122,8 +131,8 @@ class ProlongationBlockInteriorZeroDirichlet { md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); } const auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); - const auto desc_coarse = - parthenon::MakePackDescriptorFromTypeList(md.get(), {}, {PDOpt::Coarse}); + const auto desc_coarse = parthenon::MakePackDescriptorFromTypeList( + md.get(), std::vector{}, std::set{PDOpt::Coarse}); auto pack = desc.GetPack(md.get(), include_block); auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); @@ -191,4 +200,4 @@ class ProlongationBlockInteriorZeroDirichlet { } // namespace parthenon -#endif // SOLVERS_SOLVER_UTILS_STAGES_HPP_ +#endif // SOLVERS_INTERNAL_PROLONGATION_HPP_ diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index abbc96beef28..e08b6f4b6da9 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -68,29 +68,20 @@ class MGSolverStages : public SolverBase { std::string container_rhs; // Internal containers for solver which create deep copies of sol_fields std::string container_res_err, container_temp, container_u0, container_diag; - - MGSolverStages(const std::string &container_base, - const std::string &container_u, - const std::string &container_rhs, - ParameterInput *pin, - const std::string &input_block, - equations_t eq_in = equations_t()) + + MGSolverStages(const std::string &container_base, const std::string &container_u, + const std::string &container_rhs, ParameterInput *pin, + const std::string &input_block, equations_t eq_in = equations_t()) : MGSolverStages(container_base, container_u, container_rhs, MGParams(pin, input_block), eq_in, prolongator_t(pin, input_block)) {} - MGSolverStages(const std::string &container_base, - const std::string &container_u, - const std::string &container_rhs, - MGParams params_in, + MGSolverStages(const std::string &container_base, const std::string &container_u, + const std::string &container_rhs, MGParams params_in, equations_t eq_in = equations_t(), prolongator_t prol_in = prolongator_t()) - : container_base(container_base), - container_u(container_u), - container_rhs(container_rhs), - params_(params_in), - iter_counter(0), - eqs_(eq_in), + : container_base(container_base), container_u(container_u), + container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in), prolongator_(prol_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); @@ -468,14 +459,8 @@ class MGSolverStages : public SolverBase { auto set_from_coarser = tl.AddTask(recv_from_coarser, BTF(SetBounds), md_res_err); - auto prolongate = set_from_coarser; - if (params_.prolongation == "User") { - prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_res_err); - } else { - prolongate = tl.AddTask(set_from_coarser, - BTF(ProlongateBounds), - md_res_err); - } + auto prolongate = + prolongator_.template Prolongate(tl, set_from_coarser, md_res_err); // 7. Correct solution on this level with res_err field and store in // communication field From 6b10143418fbdd187a74e9912e4bc0f6f1fa8eae Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 8 Oct 2024 16:25:03 -0600 Subject: [PATCH 40/62] Remove some more stuff --- example/poisson_gmg/poisson_equation_stages.hpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index 813e4f9e8501..8c935fe8c8b4 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -39,8 +39,6 @@ class PoissonEquationStages { bool do_flux_cor = false; bool set_flux_boundary = false; bool include_flux_dx = false; - enum class ProlongationType { Constant, Linear, Kwak }; - ProlongationType prolongation_type = ProlongationType::Constant; using IndependentVars = parthenon::TypeList; @@ -49,16 +47,6 @@ class PoissonEquationStages { set_flux_boundary = pin->GetOrAddBoolean(label, "set_flux_boundary", false); include_flux_dx = (pin->GetOrAddString(label, "boundary_prolongation", "Linear") == "Constant"); - auto pro_int = pin->GetOrAddString(label, "interior_prolongation", "Linear"); - if (pro_int == "Constant") { - prolongation_type = ProlongationType::Constant; - } else if (pro_int == "Linear") { - prolongation_type = ProlongationType::Linear; - } else if (pro_int == "Kwak") { - prolongation_type = ProlongationType::Kwak; - } else { - PARTHENON_FAIL("Invalid user prolongation type."); - } } // Add tasks to calculate the result of the matrix A (which is implicitly defined by From 96ae149c9b4072ce25be01f2ebbc15b12264e1d0 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 8 Oct 2024 16:26:18 -0600 Subject: [PATCH 41/62] format --- src/solvers/internal_prolongation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/internal_prolongation.hpp b/src/solvers/internal_prolongation.hpp index 676d0733b163..f351d3329044 100644 --- a/src/solvers/internal_prolongation.hpp +++ b/src/solvers/internal_prolongation.hpp @@ -200,4 +200,4 @@ class ProlongationBlockInteriorZeroDirichlet { } // namespace parthenon -#endif // SOLVERS_INTERNAL_PROLONGATION_HPP_ +#endif // SOLVERS_INTERNAL_PROLONGATION_HPP_ From 01917a2d6b09004ccc60422a3af5354a9801a794 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 8 Oct 2024 16:32:29 -0600 Subject: [PATCH 42/62] possibly fix non-gcc compilation errors --- src/solvers/bicgstab_solver_stages.hpp | 6 +++--- src/solvers/cg_solver_stages.hpp | 2 +- src/solvers/mg_solver_stages.hpp | 11 +++++------ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index dd08c5ae97b3..6ee96b121777 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -89,7 +89,7 @@ class BiCGSTABSolverStages : public SolverBase { auto partitions = pmesh->GetDefaultBlockPartitions(); auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); auto &md_diag = pmesh->mesh_data.Add(container_diag, md, sol_fields); - return tl.AddTask(dependence, &equations::template SetDiagonal, &eqs_, md, md_diag); + return tl.AddTask(dependence, &equations::SetDiagonal, &eqs_, md, md_diag); } else { return dependence; } @@ -188,7 +188,7 @@ class BiCGSTABSolverStages : public SolverBase { // 2. v <- A u auto comm = AddBoundaryExchangeTasks(precon1, itl, md_u, multilevel); - auto get_v = eqs_.template Ax(itl, comm, md_base, md_u, md_v); + auto get_v = eqs_.Ax(itl, comm, md_base, md_u, md_v); // 3. rhat0v <- (rhat0, v) auto get_rhat0v = DotProduct(get_v, itl, &rhat0v, md_rhat0, md_v); @@ -242,7 +242,7 @@ class BiCGSTABSolverStages : public SolverBase { // 7. t <- A u auto pre_t_comm = AddBoundaryExchangeTasks(precon2, itl, md_u, multilevel); - auto get_t = eqs_.template Ax(itl, pre_t_comm, md_base, md_u, md_t); + auto get_t = eqs_.Ax(itl, pre_t_comm, md_base, md_u, md_t); // 8. omega <- (t,s) / (t,t) auto get_ts = DotProduct(get_t, itl, &ts, md_t, md_s); diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index 6819fa9f1504..2c3e13fbc07b 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -181,7 +181,7 @@ class CGSolverStages : public SolverBase { // 4. v <- A p auto comm = AddBoundaryExchangeTasks(correct_p, itl, md_p, multilevel); - auto get_v = eqs_.template Ax(itl, comm, md_base, md_p, md_v); + auto get_v = eqs_.Ax(itl, comm, md_base, md_p, md_v); // 5. alpha <- r dot u / p dot v (calculate denominator) auto get_pAp = DotProduct(get_v, itl, &pAp, md_p, md_v); diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index e08b6f4b6da9..519a02ef1068 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -117,7 +117,7 @@ class MGSolverStages : public SolverBase { auto &md_rhs = pmesh->mesh_data.Add(container_rhs, md, sol_fields); auto comm = AddBoundaryExchangeTasks(mg_finest, itl, md_u, pmesh->multilevel); - auto calc_pointwise_res = eqs_.template Ax(itl, comm, md, md_u, md_res_err); + auto calc_pointwise_res = eqs_.Ax(itl, comm, md, md_u, md_res_err); calc_pointwise_res = itl.AddTask(calc_pointwise_res, TF(AddFieldsAndStoreInteriorSelect), md_rhs, md_res_err, md_res_err, 1.0, -1.0, false); @@ -266,7 +266,7 @@ class MGSolverStages : public SolverBase { auto comm = AddBoundaryExchangeTasks(depends_on, tl, md_in, multilevel); - auto mat_mult = eqs_.template Ax(tl, comm, md_base, md_in, md_out); + auto mat_mult = eqs_.Ax(tl, comm, md_base, md_in, md_out); return tl.AddTask(mat_mult, TF(&MGSolverStages::Jacobi), this, md_rhs, md_out, md_diag, md_in, md_out, omega); } @@ -329,8 +329,7 @@ class MGSolverStages : public SolverBase { tl.AddTask(task_out, TF(ReceiveBoundBufs), md); task_out = tl.AddTask(task_out, TF(SetBounds), md); } - task_out = - tl.AddTask(task_out, BTF(&equations_t::template SetDiagonal), &eqs_, md, md_diag); + task_out = tl.AddTask(task_out, BTF(&equations_t::SetDiagonal), &eqs_, md, md_diag); // If we are finer than the coarsest level: if (level > min_level) { task_out = @@ -419,7 +418,7 @@ class MGSolverStages : public SolverBase { // This should set the rhs only in blocks that correspond to interior nodes, the // RHS of leaf blocks that are on this GMG level should have already been set on // entry into multigrid - set_from_finer = eqs_.template Ax(tl, set_from_finer, md, md_u, md_temp); + set_from_finer = eqs_.Ax(tl, set_from_finer, md, md_u, md_temp); set_from_finer = tl.AddTask(set_from_finer, BTF(AddFieldsAndStoreInteriorSelect), md_temp, md_res_err, md_rhs, 1.0, 1.0, true); @@ -440,7 +439,7 @@ class MGSolverStages : public SolverBase { multilevel); // 4. Caclulate residual and store in communication field - auto residual = eqs_.template Ax(tl, comm_u, md, md_u, md_temp); + auto residual = eqs_.Ax(tl, comm_u, md, md_u, md_temp); residual = tl.AddTask(residual, BTF(AddFieldsAndStoreInteriorSelect), md_rhs, md_temp, md_res_err, 1.0, -1.0, false); From c176abb4ab4304da6cbf56e342b9f6daed294053 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 8 Oct 2024 17:15:31 -0600 Subject: [PATCH 43/62] update docs --- doc/sphinx/src/solvers.rst | 93 ++++++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 15 deletions(-) diff --git a/doc/sphinx/src/solvers.rst b/doc/sphinx/src/solvers.rst index cadd20862d05..a7dbb3a15911 100644 --- a/doc/sphinx/src/solvers.rst +++ b/doc/sphinx/src/solvers.rst @@ -3,23 +3,86 @@ Solvers ======= -Parthenon does not yet provide an exhaustive set of plug and play solvers. -Nevertheless, the building blocks required for implementing Krylov subspace -methods (i.e. global reductions for vector dot products) like CG, BiCGStab, -and GMRES are available. An example of a Parthenon based implementation of -BiCGStab can be found in ``examples/poisson_gmg``. Additionally, the -infrastructure required for implementing multigrid solvers is also -included in Parthenon. The requisite hierarchy of grids is produced if -``parthenon/mesh/multigrid=true`` is set in the parameter input. An example -of a multi-grid based linear solver in Parthenon is also given in -``examples/poisson_gmg`` (and also an example of using multi-grid as a -preconditioner for BiCGStab). We plan to build wrappers that simplify the -use of these methods in down stream codes in the future. Note that the -example code does not currently rely on the Stencil and SparseMatrixAccessor -code described below. +Parthenon provides a number of linear solvers, including a geometric multigrid +solver, a CG solver, a BiCGSTAB solver, and multigrid preconditioned versions +of the latter two solvers. + +Solvers are templated on a type defining the system of equations they are solving. +The type defining the system of equations must provide two methods and a ``TypeList`` +of all of the fields that make up the vector space: +.. code:: c++ + class MySystemOfEquations { + using IndependentVars = parthenon::TypeList; + + TaskId Ax(TaskList &tl, TaskID depends_on, + std::shared_ptr> &md_mat, + std::shared_ptr> &md_in, + std::shared_ptr> &md_out); + + TaskStatus SetDiagonal(std::shared_ptr> &md_mat, + std::shared_ptr> &md_diag) + }; + +The routine ``Ax`` must calculate the matrix vector product ``y <- A.x`` by taking a container +``md_mat`` which contains all of the fields required to reconstruct the matrix ``A`` associated +with the system of linear equations, the container ``md_in`` which will store the vector ``x`` +in the fields in the typelist ``IndependentVars``, and ``md_out`` which will hold the vector ``y``. + +The routine ``SetDiagonal`` takes the same container ``md_mat`` as ``Ax`` and returns the +(approximate) diagonal of ``A`` in the container ``md_diag``. This only needs to be approximate +since it is only used in preconditioners/smoothers. + +With such a class defining a linear system of equations, one can then define and use a solver with +code along the lines of: +.. code:: c++ + std::string base_cont_name = "base"; + std::string u_cont_name = "u"; + std::string rhs_cont_name = "rhs"; + + MySystemOfEquations eqs(....); + std::shared_ptr psolver = std::make_shared>( + base_cont_name, u_cont_name, rhs_cont_name, pin, "location/of/solver_params", eqs); + + ... + + auto partitions = pmesh->GetDefaultBlockPartitions(); + const int num_partitions = partitions.size(); + TaskRegion ®ion = tc.AddRegion(num_partitions); + for (int partition = 0; partition < num_partitions; ++partition) { + TaskList &tl = region[partition]; + auto &md = pmesh->mesh_data.Add(base_cont_name, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(u_cont_name, md); + auto &md_rhs = pmesh->mesh_data.Add(rhs_cont_name, md); + + // Do some stuff to fill the base container with information necessary to define A + // if it wasn't defined during initialization or something + + // Do some stuff to fill the rhs container + + auto setup = psolver->AddSetupTasks(tl, dependence, partition, pmesh); + auto solve = psolver->AddTasks(tl, setup, partition, pmesh); + + // Do some stuff with the solution stored in md_u + } + +Some notes: +- All solvers inherit from ``SolverBase``, so the best practice is to stash a shared pointer to a + ``SolverBase`` object in params during initialization and pull this solver out while building a + task list. This should make switching between solvers trivial. +- For any solver involving geometric multigrid, the input parameter + ``parthenon/mesh/multigrid`` must be set to ``true``. This tells the ``Mesh`` + to build the coarsened blocks associated with the multi-grid hierarchy. +- For geometric multigrid based solvers, it is possible to define block interior prolongation + operators that are separate from the standard prolongation machinery in Parthenon. This allows + for defining boundary aware prolongation operators and having different prolongation operators + in the ghost cells of blocks from the prolongation operators used in their interiors. Users can + easily define their own prolongation operators. An example of using these interior prolongation + operators is contained in the ``poisson_gmg`` example. +- Note that none of the example code does not currently rely on the Stencil and SparseMatrixAccessor + code described below, which persist from an older version of the solvers Some implementation notes about geometric multi-grid can be found in -:ref:`these notes `. +:ref:`these notes `. Stencil ------- From a31f34f331d56f5fca219f57cd8df2fa84f3dd93 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 9 Oct 2024 17:35:59 -0600 Subject: [PATCH 44/62] small change to doc --- doc/sphinx/src/solvers.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/sphinx/src/solvers.rst b/doc/sphinx/src/solvers.rst index a7dbb3a15911..51a4ee6c7b9b 100644 --- a/doc/sphinx/src/solvers.rst +++ b/doc/sphinx/src/solvers.rst @@ -78,8 +78,6 @@ Some notes: in the ghost cells of blocks from the prolongation operators used in their interiors. Users can easily define their own prolongation operators. An example of using these interior prolongation operators is contained in the ``poisson_gmg`` example. -- Note that none of the example code does not currently rely on the Stencil and SparseMatrixAccessor - code described below, which persist from an older version of the solvers Some implementation notes about geometric multi-grid can be found in :ref:`these notes `. From 9b0a9ac3a72cfa40b9614d3940978138855406ed Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 9 Oct 2024 20:32:51 -0600 Subject: [PATCH 45/62] Only include solver fields in md_u --- example/poisson_gmg/poisson_driver.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index c6f25181380e..d593265d27f0 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -72,8 +72,8 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { for (int i = 0; i < num_partitions; ++i) { TaskList &tl = region[i]; auto &md = pmesh->mesh_data.Add("base", partitions[i]); - auto &md_u = pmesh->mesh_data.Add("u", md); - auto &md_rhs = pmesh->mesh_data.Add("rhs", md); + auto &md_u = pmesh->mesh_data.Add("u", md, {u::name()}); + auto &md_rhs = pmesh->mesh_data.Add("rhs", md, {u::name()}); // Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is // known when we solve A.u = rhs @@ -85,12 +85,14 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { get_rhs = eqs->Ax(tl, comm, md); } + // Move the rhs variable into the rhs stage for stage based solver + auto copy_rhs = tl.AddTask(get_rhs, TF(solvers::utils::CopyData), md); + copy_rhs = tl.AddTask( + copy_rhs, TF(solvers::StageUtils::CopyData>), md, md_rhs); + // Set initial solution guess to zero - auto zero_u = tl.AddTask(get_rhs, TF(solvers::utils::SetToZero), md); + auto zero_u = tl.AddTask(copy_rhs, TF(solvers::utils::SetToZero), md); zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); - zero_u = tl.AddTask( - zero_u, TF(solvers::StageUtils::CopyData>), md, md_rhs); - zero_u = tl.AddTask(zero_u, TF(solvers::utils::CopyData), md_rhs); auto setup = psolver->AddSetupTasks(tl, zero_u, i, pmesh); auto solve = psolver->AddTasks(tl, setup, i, pmesh); From 93e08b1ea52cd781b295621ca947c689b3cdbfb3 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 9 Oct 2024 20:40:14 -0600 Subject: [PATCH 46/62] explicitly only include the solution fields --- src/solvers/bicgstab_solver_stages.hpp | 4 ++-- src/solvers/cg_solver_stages.hpp | 4 ++-- src/solvers/mg_solver_stages.hpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 6ee96b121777..0ebd2875ca9c 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -105,9 +105,9 @@ class BiCGSTABSolverStages : public SolverBase { auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); // Container in which the solution is stored and with which the downstream user can // interact. This container only requires the fields in sol_fields - auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition], sol_fields); // Container of the rhs, only requires fields in sol_fields - auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition], sol_fields); // Internal solver containers auto &md_rhat0 = pmesh->mesh_data.Add(container_rhat0, md_u, sol_fields); auto &md_v = pmesh->mesh_data.Add(container_v, md_u, sol_fields); diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index 2c3e13fbc07b..e3b8aab58f15 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -91,9 +91,9 @@ class CGSolverStages : public SolverBase { auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); // Container in which the solution is stored and with which the downstream user can // interact. This container only requires the fields in sol_fields - auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition], sol_fields); // Container of the rhs, only requires fields in sol_fields - auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition], sol_fields); // Internal solver containers auto &md_x = pmesh->mesh_data.Add(container_x, md_u, sol_fields); auto &md_r = pmesh->mesh_data.Add(container_r, md_u, sol_fields); diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 519a02ef1068..cb4afed43831 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -261,7 +261,7 @@ class MGSolverStages : public SolverBase { auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); auto &md_base = pmesh->mesh_data.Add(container_base, partitions[partition]); - auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition], sol_fields); auto &md_diag = pmesh->mesh_data.Add(container_diag, md_base, sol_fields); auto comm = @@ -377,8 +377,8 @@ class MGSolverStages : public SolverBase { pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); if (partition >= partitions.size()) return dependence; auto &md = pmesh->mesh_data.Add(container_base, partitions[partition]); - auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition]); - auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition]); + auto &md_u = pmesh->mesh_data.Add(container_u, partitions[partition], sol_fields); + auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition], sol_fields); auto &md_res_err = pmesh->mesh_data.Add(container_res_err, md, sol_fields); auto &md_temp = pmesh->mesh_data.Add(container_temp, md, sol_fields); auto &md_u0 = pmesh->mesh_data.Add(container_u0, md, sol_fields); From 6fb49a429300387879030aff20c9efe4f45a0c09 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 10 Oct 2024 13:31:16 -0600 Subject: [PATCH 47/62] format --- example/poisson_gmg/poisson_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index d593265d27f0..1b77534ace20 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -89,7 +89,7 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto copy_rhs = tl.AddTask(get_rhs, TF(solvers::utils::CopyData), md); copy_rhs = tl.AddTask( copy_rhs, TF(solvers::StageUtils::CopyData>), md, md_rhs); - + // Set initial solution guess to zero auto zero_u = tl.AddTask(copy_rhs, TF(solvers::utils::SetToZero), md); zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); From 5b737e987fb8991676088a7b8657c57cb09a1164 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 19 Nov 2024 18:06:38 -0700 Subject: [PATCH 48/62] add unique ids to staged solvers --- src/solvers/bicgstab_solver_stages.hpp | 5 +++-- src/solvers/cg_solver_stages.hpp | 3 ++- src/solvers/mg_solver_stages.hpp | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 0ebd2875ca9c..58f759925b8c 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -58,7 +58,8 @@ class BiCGSTABSolverStages : public SolverBase { // Internal containers for solver which create deep copies of sol_fields std::string container_rhat0, container_v, container_h, container_s; std::string container_t, container_r, container_p, container_x, container_diag; - + + static inline std::size_t id{0}; public: BiCGSTABSolverStages(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, ParameterInput *pin, @@ -70,7 +71,7 @@ class BiCGSTABSolverStages : public SolverBase { eqs_(eq_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); - std::string solver_id = "bicgstab"; + std::string solver_id = "bicgstab" + std::to_string(id++); container_rhat0 = solver_id + "_rhat0"; container_v = solver_id + "_v"; container_h = solver_id + "_h"; diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index e3b8aab58f15..e76b1e63a0a6 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -60,6 +60,7 @@ class CGSolverStages : public SolverBase { // Internal containers for solver which create deep copies of sol_fields std::string container_x, container_r, container_v, container_p; + static inline std::size_t id{0}; public: CGSolverStages(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, ParameterInput *pin, @@ -71,7 +72,7 @@ class CGSolverStages : public SolverBase { eqs_(eq_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); - std::string solver_id = "cg"; + std::string solver_id = "cg" + std::to_string(id++); container_x = solver_id + "_x"; container_r = solver_id + "_r"; container_v = solver_id + "_v"; diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index cb4afed43831..25e4efb87473 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -52,6 +52,7 @@ namespace solvers { // associated with the type diag_t. This is used for Jacobi iteration. template class MGSolverStages : public SolverBase { + static inline std::size_t id{0}; public: using FieldTL = typename equations_t::IndependentVars; @@ -85,7 +86,7 @@ class MGSolverStages : public SolverBase { prolongator_(prol_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); - std::string solver_id = "mg"; + std::string solver_id = "mg" + std::to_string(id++); container_res_err = solver_id + "_res_err"; container_temp = solver_id + "_temp"; container_u0 = solver_id + "_u0"; From 83151d204023ba4eae5918efb7916ed020431791 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 20 Nov 2024 11:40:16 -0700 Subject: [PATCH 49/62] small --- doc/sphinx/src/solvers.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx/src/solvers.rst b/doc/sphinx/src/solvers.rst index 51a4ee6c7b9b..e3cbc7e2baa2 100644 --- a/doc/sphinx/src/solvers.rst +++ b/doc/sphinx/src/solvers.rst @@ -76,7 +76,8 @@ Some notes: operators that are separate from the standard prolongation machinery in Parthenon. This allows for defining boundary aware prolongation operators and having different prolongation operators in the ghost cells of blocks from the prolongation operators used in their interiors. Users can - easily define their own prolongation operators. An example of using these interior prolongation + easily define their own prolongation operators. The prolongation functor is passed as a template + argument to the multi-grid solver class. An example of using these interior prolongation operators is contained in the ``poisson_gmg`` example. Some implementation notes about geometric multi-grid can be found in From 5f866e7d06a7c68ca7f1306d089b4cea9e747ca5 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 12:07:23 -0700 Subject: [PATCH 50/62] clean up macros --- src/solvers/mg_solver.hpp | 1 + src/solvers/mg_solver_stages.hpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index d6b72bbe06e9..8e03f7fa0320 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -527,6 +527,7 @@ class MGSolver : public SolverBase { } // The boundaries are not up to date on return return last_task; +#undef BTF } }; diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 25e4efb87473..4c9c4fb1594c 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -330,7 +330,7 @@ class MGSolverStages : public SolverBase { tl.AddTask(task_out, TF(ReceiveBoundBufs), md); task_out = tl.AddTask(task_out, TF(SetBounds), md); } - task_out = tl.AddTask(task_out, BTF(&equations_t::SetDiagonal), &eqs_, md, md_diag); + task_out = tl.AddTask(task_out, TF(&equations_t::SetDiagonal), &eqs_, md, md_diag); // If we are finer than the coarsest level: if (level > min_level) { task_out = @@ -498,6 +498,7 @@ class MGSolverStages : public SolverBase { } // The boundaries are not up to date on return return last_task; +#undef BTF } }; From bb49af87997d61a62dfad6be1ea149e8bdf52064 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 12:08:38 -0700 Subject: [PATCH 51/62] add staged GMG for testing --- example/poisson_gmg/poisson_package.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 25dcf4c9f682..04078caadd31 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -96,6 +96,10 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg->AddParam<>("poisson_equation", eq, parthenon::Params::Mutability::Mutable); std::shared_ptr psolver; + using PoissEqStages = poisson_package::PoissonEquationStages; + using prolongator_t = parthenon::solvers::ProlongationBlockInteriorDefault; + using preconditioner_t = + parthenon::solvers::MGSolverStages; if (solver == "MG") { parthenon::solvers::MGParams params(pin, "poisson/solver_params"); psolver = std::make_shared>( @@ -109,19 +113,15 @@ std::shared_ptr Initialize(ParameterInput *pin) { parthenon::solvers::CGParams params(pin, "poisson/solver_params"); psolver = std::make_shared>( pkg.get(), params, eq); + } else if (solver == "MGStages") { + psolver = std::make_shared< + parthenon::solvers::MGSolverStages>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); } else if (solver == "CGStages") { - using PoissEqStages = poisson_package::PoissonEquationStages; - using prolongator_t = parthenon::solvers::ProlongationBlockInteriorZeroDirichlet; - using preconditioner_t = - parthenon::solvers::MGSolverStages; psolver = std::make_shared< parthenon::solvers::CGSolverStages>( "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); } else if (solver == "BiCGSTABStages") { - using PoissEqStages = poisson_package::PoissonEquationStages; - using prolongator_t = parthenon::solvers::ProlongationBlockInteriorZeroDirichlet; - using preconditioner_t = - parthenon::solvers::MGSolverStages; psolver = std::make_shared< parthenon::solvers::BiCGSTABSolverStages>( "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); @@ -142,7 +142,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::vector flags{Metadata::Cell, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, Metadata::GMGRestrict}; - if (solver == "CGStages" || solver == "BiCGSTABStages") + if (solver == "CGStages" || solver == "BiCGSTABStages" || solver == "MGStages") flags.push_back(Metadata::GMGProlongate); auto mflux_comm = Metadata(flags); if (prolong == "Linear") { From d00c887f9ef3b7b886abcacae022342c8053d8ce Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 14:37:17 -0700 Subject: [PATCH 52/62] remove type based solvers --- example/poisson_gmg/poisson_driver.cpp | 6 +- example/poisson_gmg/poisson_package.cpp | 22 +- src/CMakeLists.txt | 3 - src/solvers/bicgstab_solver.hpp | 360 ---------------- src/solvers/bicgstab_solver_stages.hpp | 31 +- src/solvers/cg_solver.hpp | 271 ------------ src/solvers/cg_solver_stages.hpp | 22 +- src/solvers/mg_solver.hpp | 538 ------------------------ src/solvers/mg_solver_stages.hpp | 24 ++ 9 files changed, 79 insertions(+), 1198 deletions(-) delete mode 100644 src/solvers/bicgstab_solver.hpp delete mode 100644 src/solvers/cg_solver.hpp delete mode 100644 src/solvers/mg_solver.hpp diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index 1b77534ace20..b9a919d59100 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -29,10 +29,10 @@ #include "poisson_equation_stages.hpp" #include "poisson_package.hpp" #include "prolong_restrict/prolong_restrict.hpp" -#include "solvers/bicgstab_solver.hpp" -#include "solvers/cg_solver.hpp" +#include "solvers/bicgstab_solver_stages.hpp" #include "solvers/cg_solver_stages.hpp" -#include "solvers/mg_solver.hpp" +#include "solvers/mg_solver_stages.hpp" +#include "solvers/solver_utils.hpp" using namespace parthenon::driver::prelude; diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 04078caadd31..19dc7d7c7479 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -23,11 +23,8 @@ #include #include #include -#include #include -#include #include -#include #include #include "defs.hpp" @@ -100,20 +97,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { using prolongator_t = parthenon::solvers::ProlongationBlockInteriorDefault; using preconditioner_t = parthenon::solvers::MGSolverStages; - if (solver == "MG") { - parthenon::solvers::MGParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>( - pkg.get(), params, eq); - } else if (solver == "BiCGSTAB") { - parthenon::solvers::BiCGSTABParams params(pin, "poisson/solver_params"); - psolver = - std::make_shared>( - pkg.get(), params, eq); - } else if (solver == "CG") { - parthenon::solvers::CGParams params(pin, "poisson/solver_params"); - psolver = std::make_shared>( - pkg.get(), params, eq); - } else if (solver == "MGStages") { + if (solver == "MGStages") { psolver = std::make_shared< parthenon::solvers::MGSolverStages>( "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); @@ -141,9 +125,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::vector flags{Metadata::Cell, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, - Metadata::GMGRestrict}; - if (solver == "CGStages" || solver == "BiCGSTABStages" || solver == "MGStages") - flags.push_back(Metadata::GMGProlongate); + Metadata::GMGRestrict, Metadata::GMGProlongate}; auto mflux_comm = Metadata(flags); if (prolong == "Linear") { mflux_comm.RegisterRefinementOps(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f99aebe1fa0..7ca142d809a5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -229,12 +229,9 @@ add_library(parthenon amr_criteria/refinement_package.cpp amr_criteria/refinement_package.hpp - solvers/bicgstab_solver.hpp solvers/bicgstab_solver_stages.hpp - solvers/cg_solver.hpp solvers/cg_solver_stages.hpp solvers/internal_prolongation.hpp - solvers/mg_solver.hpp solvers/mg_solver_stages.hpp solvers/solver_base.hpp solvers/solver_utils.hpp diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp deleted file mode 100644 index 94a5824f47aa..000000000000 --- a/src/solvers/bicgstab_solver.hpp +++ /dev/null @@ -1,360 +0,0 @@ -//======================================================================================== -// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. -// -// This program was produced under U.S. Government contract 89233218CNA000001 for Los -// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -// for the U.S. Department of Energy/National Nuclear Security Administration. All rights -// in the program are reserved by Triad National Security, LLC, and the U.S. Department -// of Energy/National Nuclear Security Administration. The Government is granted for -// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -// license in this material to reproduce, prepare derivative works, distribute copies to -// the public, perform publicly and display publicly, and to permit others to do so. -//======================================================================================== -#ifndef SOLVERS_BICGSTAB_SOLVER_HPP_ -#define SOLVERS_BICGSTAB_SOLVER_HPP_ - -#include -#include -#include -#include -#include - -#include "interface/mesh_data.hpp" -#include "interface/meshblock_data.hpp" -#include "interface/state_descriptor.hpp" -#include "kokkos_abstraction.hpp" -#include "solvers/mg_solver.hpp" -#include "solvers/solver_base.hpp" -#include "solvers/solver_utils.hpp" -#include "tasks/tasks.hpp" -#include "utils/type_list.hpp" - -namespace parthenon { - -namespace solvers { - -enum class Preconditioner { None, Diagonal, Multigrid }; -struct BiCGSTABParams { - MGParams mg_params; - int max_iters = 1000; - std::shared_ptr residual_tolerance = std::make_shared(1.e-12); - Preconditioner precondition_type = Preconditioner::Multigrid; - bool print_per_step = false; - bool relative_residual = false; - BiCGSTABParams() = default; - BiCGSTABParams(ParameterInput *pin, const std::string &input_block) { - max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); - *residual_tolerance = - pin->GetOrAddReal(input_block, "residual_tolerance", *residual_tolerance); - bool precondition = pin->GetOrAddBoolean(input_block, "precondition", true); - std::string precondition_str = - pin->GetOrAddString(input_block, "preconditioner", "Multigrid"); - if (precondition && precondition_str == "Multigrid") { - precondition_type = Preconditioner::Multigrid; - } else if (precondition && precondition_str == "Diagonal") { - precondition_type = Preconditioner::Diagonal; - } else { - precondition_type = Preconditioner::None; - } - print_per_step = pin->GetOrAddBoolean(input_block, "print_per_step", print_per_step); - mg_params = MGParams(pin, input_block); - relative_residual = - pin->GetOrAddBoolean(input_block, "relative_residual", relative_residual); - } -}; - -// The equations class must include a template method -// -// template -// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) -// -// that takes a field associated with x_t and applies -// the matrix A to it and stores the result in y_t. -template -class BiCGSTABSolver : public SolverBase { - public: - PARTHENON_INTERNALSOLVERVARIABLE(u, rhat0); - PARTHENON_INTERNALSOLVERVARIABLE(u, v); - PARTHENON_INTERNALSOLVERVARIABLE(u, h); - PARTHENON_INTERNALSOLVERVARIABLE(u, s); - PARTHENON_INTERNALSOLVERVARIABLE(u, t); - PARTHENON_INTERNALSOLVERVARIABLE(u, r); - PARTHENON_INTERNALSOLVERVARIABLE(u, p); - PARTHENON_INTERNALSOLVERVARIABLE(u, x); - PARTHENON_INTERNALSOLVERVARIABLE(u, diag); - - using internal_types_tl = TypeList; - using preconditioner_t = MGSolver; - using all_internal_types_tl = - concatenate_type_lists_t; - - std::vector GetInternalVariableNames() const { - std::vector names; - if (params_.precondition_type == Preconditioner::Multigrid) { - all_internal_types_tl::IterateTypes( - [&names](auto t) { names.push_back(decltype(t)::name()); }); - } else { - internal_types_tl::IterateTypes( - [&names](auto t) { names.push_back(decltype(t)::name()); }); - } - return names; - } - - BiCGSTABSolver(StateDescriptor *pkg, BiCGSTABParams params_in, - equations eq_in = equations(), std::vector shape = {}, - const std::string &container = "base") - : preconditioner(pkg, params_in.mg_params, eq_in, shape, container), - params_(params_in), iter_counter(0), eqs_(eq_in), container_(container) { - using namespace refinement_ops; - auto m_no_ghost = - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, shape); - pkg->AddField(rhat0::name(), m_no_ghost); - pkg->AddField(v::name(), m_no_ghost); - pkg->AddField(h::name(), m_no_ghost); - pkg->AddField(s::name(), m_no_ghost); - pkg->AddField(t::name(), m_no_ghost); - pkg->AddField(r::name(), m_no_ghost); - pkg->AddField(p::name(), m_no_ghost); - pkg->AddField(x::name(), m_no_ghost); - pkg->AddField(diag::name(), m_no_ghost); - } - - TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - if (params_.precondition_type == Preconditioner::Multigrid) { - return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); - } else if (params_.precondition_type == Preconditioner::Diagonal) { - auto partitions = pmesh->GetDefaultBlockPartitions(); - auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); - return tl.AddTask(dependence, &equations::template SetDiagonal, &eqs_, md); - } else { - return dependence; - } - } - - TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace utils; - TaskID none; - auto &md = pmesh->mesh_data.GetOrAdd(container_, partition); - std::string label = container_ + "bicg_comm_" + std::to_string(partition); - auto &md_comm = - pmesh->mesh_data.AddShallow(label, md, std::vector{u::name()}); - iter_counter = 0; - bool multilevel = pmesh->multilevel; - - // Initialization: x <- 0, r <- rhs, rhat0 <- rhs, - // rhat0r_old <- (rhat0, r), p <- r, u <- 0 - auto zero_x = tl.AddTask(dependence, TF(SetToZero), md); - auto zero_u_init = tl.AddTask(dependence, TF(SetToZero), md); - auto copy_r = tl.AddTask(dependence, TF(CopyData), md); - auto copy_p = tl.AddTask(dependence, TF(CopyData), md); - auto copy_rhat0 = tl.AddTask(dependence, TF(CopyData), md); - auto get_rhat0r_init = DotProduct(dependence, tl, &rhat0r, md); - auto get_rhs2 = get_rhat0r_init; - if (params_.relative_residual || params_.print_per_step) - get_rhs2 = DotProduct(dependence, tl, &rhs2, md); - auto initialize = tl.AddTask( - TaskQualifier::once_per_region | TaskQualifier::local_sync, - zero_x | zero_u_init | copy_r | copy_p | copy_rhat0 | get_rhat0r_init | get_rhs2, - "zero factors", - [](BiCGSTABSolver *solver) { - solver->iter_counter = -1; - return TaskStatus::complete; - }, - this); - tl.AddTask( - TaskQualifier::once_per_region, initialize, "print to screen", - [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, bool relative_residual, - Mesh *pm) { - if (Globals::my_rank == 0 && params_.print_per_step) { - Real tol = relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) - : *res_tol; - printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", - tol); - printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); - } - return TaskStatus::complete; - }, - this, params_.residual_tolerance, params_.relative_residual, pmesh); - - // BEGIN ITERATIVE TASKS - auto [itl, solver_id] = tl.AddSublist(initialize, {1, params_.max_iters}); - - auto sync = itl.AddTask(TaskQualifier::local_sync, none, - []() { return TaskStatus::complete; }); - auto reset = itl.AddTask( - TaskQualifier::once_per_region, sync, "update values", - [](BiCGSTABSolver *solver) { - solver->rhat0r_old = solver->rhat0r.val; - solver->iter_counter++; - return TaskStatus::complete; - }, - this); - - // 1. u <- M p - auto precon1 = reset; - if (params_.precondition_type == Preconditioner::Multigrid) { - auto set_rhs = itl.AddTask(precon1, TF(CopyData), md); - auto zero_u = itl.AddTask(precon1, TF(SetToZero), md); - precon1 = - preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); - } else if (params_.precondition_type == Preconditioner::Diagonal) { - precon1 = itl.AddTask(precon1, TF(ADividedByB), md); - } else { - precon1 = itl.AddTask(precon1, TF(CopyData), md); - } - - // 2. v <- A u - auto comm = - AddBoundaryExchangeTasks(precon1, itl, md_comm, multilevel); - auto get_v = eqs_.template Ax(itl, comm, md); - - // 3. rhat0v <- (rhat0, v) - auto get_rhat0v = DotProduct(get_v, itl, &rhat0v, md); - - // 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v) - auto correct_h = itl.AddTask( - get_rhat0v, "h <- x + alpha u", - [](BiCGSTABSolver *solver, std::shared_ptr> &md) { - Real alpha = solver->rhat0r_old / solver->rhat0v.val; - return AddFieldsAndStore(md, 1.0, alpha); - }, - this, md); - - // 5. s <- r - alpha v (alpha = rhat0r_old / rhat0v) - auto correct_s = itl.AddTask( - get_rhat0v, "s <- r - alpha v", - [](BiCGSTABSolver *solver, std::shared_ptr> &md) { - Real alpha = solver->rhat0r_old / solver->rhat0v.val; - return AddFieldsAndStore(md, 1.0, -alpha); - }, - this, md); - - // Check and print out residual - auto get_res = DotProduct(correct_s, itl, &residual, md); - - auto print = itl.AddTask( - TaskQualifier::once_per_region, get_res, - [&](BiCGSTABSolver *solver, Mesh *pmesh) { - Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); - if (Globals::my_rank == 0 && solver->params_.print_per_step) - printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res); - return TaskStatus::complete; - }, - this, pmesh); - - // 6. u <- M s - auto precon2 = correct_s; - if (params_.precondition_type == Preconditioner::Multigrid) { - auto set_rhs = itl.AddTask(precon2, TF(CopyData), md); - auto zero_u = itl.AddTask(precon2, TF(SetToZero), md); - precon2 = - preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); - } else if (params_.precondition_type == Preconditioner::Diagonal) { - precon2 = itl.AddTask(precon2, TF(ADividedByB), md); - } else { - precon2 = itl.AddTask(precon2, TF(CopyData), md); - } - - // 7. t <- A u - auto pre_t_comm = - AddBoundaryExchangeTasks(precon2, itl, md_comm, multilevel); - auto get_t = eqs_.template Ax(itl, pre_t_comm, md); - - // 8. omega <- (t,s) / (t,t) - auto get_ts = DotProduct(get_t, itl, &ts, md); - auto get_tt = DotProduct(get_t, itl, &tt, md); - - // 9. x <- h + omega u - auto correct_x = itl.AddTask( - get_tt | get_ts, "x <- h + omega u", - [](BiCGSTABSolver *solver, std::shared_ptr> &md) { - Real omega = solver->ts.val / solver->tt.val; - return AddFieldsAndStore(md, 1.0, omega); - }, - this, md); - - // 10. r <- s - omega t - auto correct_r = itl.AddTask( - get_tt | get_ts, "r <- s - omega t", - [](BiCGSTABSolver *solver, std::shared_ptr> &md) { - Real omega = solver->ts.val / solver->tt.val; - return AddFieldsAndStore(md, 1.0, -omega); - }, - this, md); - - // Check and print out residual - auto get_res2 = DotProduct(correct_r, itl, &residual, md); - - get_res2 = itl.AddTask( - TaskQualifier::once_per_region, get_res2, - [&](BiCGSTABSolver *solver, Mesh *pmesh) { - Real rms_err = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); - if (Globals::my_rank == 0 && solver->params_.print_per_step) - printf("%i %e\n", solver->iter_counter * 2 + 2, rms_err); - return TaskStatus::complete; - }, - this, pmesh); - - // 11. rhat0r <- (rhat0, r) - auto get_rhat0r = DotProduct(correct_r, itl, &rhat0r, md); - - // 12. beta <- rhat0r / rhat0r_old * alpha / omega - // 13. p <- r + beta * (p - omega * v) - auto update_p = itl.AddTask( - get_rhat0r | get_res2, "p <- r + beta * (p - omega * v)", - [](BiCGSTABSolver *solver, std::shared_ptr> &md) { - Real alpha = solver->rhat0r_old / solver->rhat0v.val; - Real omega = solver->ts.val / solver->tt.val; - Real beta = solver->rhat0r.val / solver->rhat0r_old * alpha / omega; - AddFieldsAndStore(md, 1.0, -omega); - return AddFieldsAndStore(md, 1.0, beta); - return TaskStatus::complete; - }, - this, md); - - // 14. rhat0r_old <- rhat0r, zero all reductions - auto check = itl.AddTask( - TaskQualifier::completion, update_p | correct_x, "rhat0r_old <- rhat0r", - [partition](BiCGSTABSolver *solver, Mesh *pmesh, int max_iter, - std::shared_ptr res_tol, bool relative_residual) { - Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); - solver->final_residual = rms_res; - solver->final_iteration = solver->iter_counter; - Real tol = relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) - : *res_tol; - if (rms_res < tol || solver->iter_counter >= max_iter) { - solver->final_residual = rms_res; - solver->final_iteration = solver->iter_counter; - return TaskStatus::complete; - } - return TaskStatus::iterate; - }, - this, pmesh, params_.max_iters, params_.residual_tolerance, - params_.relative_residual); - - return tl.AddTask(solver_id, TF(CopyData), md); - } - - Real GetSquaredResidualSum() const { return residual.val; } - int GetCurrentIterations() const { return iter_counter; } - - BiCGSTABParams &GetParams() { return params_; } - - protected: - preconditioner_t preconditioner; - BiCGSTABParams params_; - int iter_counter; - AllReduce rtr, pAp, rhat0v, rhat0r, ts, tt, residual, rhs2; - Real rhat0r_old; - equations eqs_; - std::string container_; -}; - -} // namespace solvers - -} // namespace parthenon - -#endif // SOLVERS_BICGSTAB_SOLVER_HPP_ diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 58f759925b8c..223e93777344 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -23,7 +23,6 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" -#include "solvers/mg_solver.hpp" #include "solvers/mg_solver_stages.hpp" #include "solvers/solver_base.hpp" #include "solvers/solver_utils_stages.hpp" @@ -34,6 +33,36 @@ namespace parthenon { namespace solvers { +enum class Preconditioner { None, Diagonal, Multigrid }; +struct BiCGSTABParams { + MGParams mg_params; + int max_iters = 1000; + std::shared_ptr residual_tolerance = std::make_shared(1.e-12); + Preconditioner precondition_type = Preconditioner::Multigrid; + bool print_per_step = false; + bool relative_residual = false; + BiCGSTABParams() = default; + BiCGSTABParams(ParameterInput *pin, const std::string &input_block) { + max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); + *residual_tolerance = + pin->GetOrAddReal(input_block, "residual_tolerance", *residual_tolerance); + bool precondition = pin->GetOrAddBoolean(input_block, "precondition", true); + std::string precondition_str = + pin->GetOrAddString(input_block, "preconditioner", "Multigrid"); + if (precondition && precondition_str == "Multigrid") { + precondition_type = Preconditioner::Multigrid; + } else if (precondition && precondition_str == "Diagonal") { + precondition_type = Preconditioner::Diagonal; + } else { + precondition_type = Preconditioner::None; + } + print_per_step = pin->GetOrAddBoolean(input_block, "print_per_step", print_per_step); + mg_params = MGParams(pin, input_block); + relative_residual = + pin->GetOrAddBoolean(input_block, "relative_residual", relative_residual); + } +}; + // The equations class must include a template method // // template diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp deleted file mode 100644 index 307fb9cff157..000000000000 --- a/src/solvers/cg_solver.hpp +++ /dev/null @@ -1,271 +0,0 @@ -//======================================================================================== -// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. -// -// This program was produced under U.S. Government contract 89233218CNA000001 for Los -// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -// for the U.S. Department of Energy/National Nuclear Security Administration. All rights -// in the program are reserved by Triad National Security, LLC, and the U.S. Department -// of Energy/National Nuclear Security Administration. The Government is granted for -// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -// license in this material to reproduce, prepare derivative works, distribute copies to -// the public, perform publicly and display publicly, and to permit others to do so. -//======================================================================================== -#ifndef SOLVERS_CG_SOLVER_HPP_ -#define SOLVERS_CG_SOLVER_HPP_ - -#include -#include -#include -#include -#include -#include - -#include "interface/mesh_data.hpp" -#include "interface/meshblock_data.hpp" -#include "interface/state_descriptor.hpp" -#include "kokkos_abstraction.hpp" -#include "solvers/mg_solver.hpp" -#include "solvers/solver_base.hpp" -#include "solvers/solver_utils.hpp" -#include "tasks/tasks.hpp" -#include "utils/type_list.hpp" - -namespace parthenon { - -namespace solvers { - -struct CGParams { - MGParams mg_params; - int max_iters = 1000; - std::shared_ptr residual_tolerance = std::make_shared(1.e-12); - bool precondition = true; - bool print_per_step = false; - bool relative_residual = false; - CGParams() = default; - CGParams(ParameterInput *pin, const std::string &input_block) { - max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); - *residual_tolerance = - pin->GetOrAddReal(input_block, "residual_tolerance", *residual_tolerance); - precondition = pin->GetOrAddBoolean(input_block, "precondition", precondition); - print_per_step = pin->GetOrAddBoolean(input_block, "print_per_step", print_per_step); - mg_params = MGParams(pin, input_block); - relative_residual = - pin->GetOrAddBoolean(input_block, "relative_residual", relative_residual); - } -}; - -// The equations class must include a template method -// -// template -// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) -// -// that takes a field associated with x_t and applies -// the matrix A to it and stores the result in y_t. -template -class CGSolver : public SolverBase { - public: - PARTHENON_INTERNALSOLVERVARIABLE(u, x); - PARTHENON_INTERNALSOLVERVARIABLE(u, r); - PARTHENON_INTERNALSOLVERVARIABLE(u, v); - PARTHENON_INTERNALSOLVERVARIABLE(u, p); - - using internal_types_tl = TypeList; - using preconditioner_t = MGSolver; - using all_internal_types_tl = - concatenate_type_lists_t; - - std::vector GetInternalVariableNames() const { - std::vector names; - if (params_.precondition) { - all_internal_types_tl::IterateTypes( - [&names](auto t) { names.push_back(decltype(t)::name()); }); - } else { - internal_types_tl::IterateTypes( - [&names](auto t) { names.push_back(decltype(t)::name()); }); - } - return names; - } - - CGSolver(StateDescriptor *pkg, CGParams params_in, equations eq_in = equations(), - std::vector shape = {}, const std::string &container = "base") - : preconditioner(pkg, params_in.mg_params, eq_in, shape, container), - params_(params_in), iter_counter(0), eqs_(eq_in), container_(container) { - using namespace refinement_ops; - auto m_no_ghost = - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, shape); - pkg->AddField(x::name(), m_no_ghost); - pkg->AddField(r::name(), m_no_ghost); - pkg->AddField(v::name(), m_no_ghost); - pkg->AddField(p::name(), m_no_ghost); - } - - TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - return preconditioner.AddSetupTasks(tl, dependence, partition, pmesh); - } - - TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace utils; - TaskID none; - auto &md = pmesh->mesh_data.GetOrAdd(container_, partition); - std::string label = container_ + "cg_comm_" + std::to_string(partition); - auto &md_comm = - pmesh->mesh_data.AddShallow(label, md, std::vector{u::name()}); - iter_counter = 0; - bool multilevel = pmesh->multilevel; - - // Initialization: u <- 0, r <- rhs, p <- 0, ru <- 1 - auto zero_u = tl.AddTask(dependence, TF(SetToZero), md); - auto zero_v = tl.AddTask(dependence, TF(SetToZero), md); - auto zero_x = tl.AddTask(dependence, TF(SetToZero), md); - auto zero_p = tl.AddTask(dependence, TF(SetToZero

), md); - auto copy_r = tl.AddTask(dependence, TF(CopyData), md); - auto get_rhs2 = none; - if (params_.relative_residual || params_.print_per_step) - get_rhs2 = DotProduct(dependence, tl, &rhs2, md); - auto initialize = tl.AddTask( - TaskQualifier::once_per_region | TaskQualifier::local_sync, - zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, "zero factors", - [](CGSolver *solver) { - solver->iter_counter = -1; - solver->ru.val = std::numeric_limits::max(); - return TaskStatus::complete; - }, - this); - - if (params_.print_per_step && Globals::my_rank == 0) { - initialize = tl.AddTask( - TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual, - Mesh *pm) { - Real tol = relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) - : *res_tol; - printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", - tol); - printf("0 %e\n", std::sqrt(solver->rhs2.val / pm->GetTotalCells())); - return TaskStatus::complete; - }, - this, params_.residual_tolerance, params_.relative_residual, pmesh); - } - - // BEGIN ITERATIVE TASKS - auto [itl, solver_id] = tl.AddSublist(initialize, {1, params_.max_iters}); - - auto sync = itl.AddTask(TaskQualifier::local_sync, none, - []() { return TaskStatus::complete; }); - auto reset = itl.AddTask( - TaskQualifier::once_per_region, sync, "update values", - [](CGSolver *solver) { - solver->ru_old = solver->ru.val; - solver->iter_counter++; - return TaskStatus::complete; - }, - this); - - // 1. u <- M r - auto precon = reset; - if (params_.precondition) { - auto set_rhs = itl.AddTask(precon, TF(CopyData), md); - auto zero_u = itl.AddTask(precon, TF(SetToZero), md); - precon = - preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh); - } else { - precon = itl.AddTask(precon, TF(CopyData), md); - } - - // 2. beta <- r dot u / r dot u {old} - auto get_ru = DotProduct(precon, itl, &ru, md); - - // 3. p <- u + beta p - auto correct_p = itl.AddTask( - get_ru, "p <- u + beta p", - [](CGSolver *solver, std::shared_ptr> &md) { - Real beta = solver->iter_counter > 0 ? solver->ru.val / solver->ru_old : 0.0; - return AddFieldsAndStore(md, 1.0, beta); - }, - this, md); - - // 4. v <- A p - auto copy_u = itl.AddTask(correct_p, TF(CopyData), md); - auto comm = - AddBoundaryExchangeTasks(copy_u, itl, md_comm, multilevel); - auto get_v = eqs_.template Ax(itl, comm, md); - - // 5. alpha <- r dot u / p dot v (calculate denominator) - auto get_pAp = DotProduct(get_v, itl, &pAp, md); - - // 6. x <- x + alpha p - auto correct_x = itl.AddTask( - get_pAp, "x <- x + alpha p", - [](CGSolver *solver, std::shared_ptr> &md) { - Real alpha = solver->ru.val / solver->pAp.val; - return AddFieldsAndStore(md, 1.0, alpha); - }, - this, md); - - // 6. r <- r - alpha A p - auto correct_r = itl.AddTask( - get_pAp, "r <- r - alpha A p", - [](CGSolver *solver, std::shared_ptr> &md) { - Real alpha = solver->ru.val / solver->pAp.val; - return AddFieldsAndStore(md, 1.0, -alpha); - }, - this, md); - - // 7. Check and print out residual - auto get_res = DotProduct(correct_r, itl, &residual, md); - - auto print = itl.AddTask( - TaskQualifier::once_per_region, get_res, - [&](CGSolver *solver, Mesh *pmesh) { - Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); - if (Globals::my_rank == 0 && solver->params_.print_per_step) - printf("%i %e\n", solver->iter_counter, rms_res); - return TaskStatus::complete; - }, - this, pmesh); - - auto check = itl.AddTask( - TaskQualifier::completion, get_res | correct_x, "completion", - [](CGSolver *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, - bool relative_residual) { - Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); - solver->final_residual = rms_res; - solver->final_iteration = solver->iter_counter; - Real tol = relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) - : *res_tol; - if (rms_res < tol || solver->iter_counter >= max_iter) { - solver->final_residual = rms_res; - solver->final_iteration = solver->iter_counter; - return TaskStatus::complete; - } - return TaskStatus::iterate; - }, - this, pmesh, params_.max_iters, params_.residual_tolerance, - params_.relative_residual); - - return tl.AddTask(solver_id, TF(CopyData), md); - } - - Real GetSquaredResidualSum() const { return residual.val; } - int GetCurrentIterations() const { return iter_counter; } - - CGParams &GetParams() { return params_; } - - protected: - preconditioner_t preconditioner; - CGParams params_; - int iter_counter; - AllReduce ru, pAp, residual, rhs2; - Real ru_old; - equations eqs_; - - std::string container_; -}; - -} // namespace solvers -} // namespace parthenon - -#endif // SOLVERS_CG_SOLVER_HPP_ diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index e76b1e63a0a6..07897b30d71b 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -24,8 +24,6 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" -#include "solvers/cg_solver.hpp" -#include "solvers/mg_solver.hpp" #include "solvers/mg_solver_stages.hpp" #include "solvers/solver_base.hpp" #include "solvers/solver_utils_stages.hpp" @@ -36,6 +34,26 @@ namespace parthenon { namespace solvers { +struct CGParams { + MGParams mg_params; + int max_iters = 1000; + std::shared_ptr residual_tolerance = std::make_shared(1.e-12); + bool precondition = true; + bool print_per_step = false; + bool relative_residual = false; + CGParams() = default; + CGParams(ParameterInput *pin, const std::string &input_block) { + max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); + *residual_tolerance = + pin->GetOrAddReal(input_block, "residual_tolerance", *residual_tolerance); + precondition = pin->GetOrAddBoolean(input_block, "precondition", precondition); + print_per_step = pin->GetOrAddBoolean(input_block, "print_per_step", print_per_step); + mg_params = MGParams(pin, input_block); + relative_residual = + pin->GetOrAddBoolean(input_block, "relative_residual", relative_residual); + } +}; + // The equations class must include a template method // // template diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp deleted file mode 100644 index 8e03f7fa0320..000000000000 --- a/src/solvers/mg_solver.hpp +++ /dev/null @@ -1,538 +0,0 @@ -//======================================================================================== -// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. -// -// This program was produced under U.S. Government contract 89233218CNA000001 for Los -// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -// for the U.S. Department of Energy/National Nuclear Security Administration. All rights -// in the program are reserved by Triad National Security, LLC, and the U.S. Department -// of Energy/National Nuclear Security Administration. The Government is granted for -// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -// license in this material to reproduce, prepare derivative works, distribute copies to -// the public, perform publicly and display publicly, and to permit others to do so. -//======================================================================================== -#ifndef SOLVERS_MG_SOLVER_HPP_ -#define SOLVERS_MG_SOLVER_HPP_ - -#include -#include -#include -#include -#include -#include -#include - -#include "interface/mesh_data.hpp" -#include "interface/meshblock_data.hpp" -#include "interface/state_descriptor.hpp" -#include "kokkos_abstraction.hpp" -#include "solvers/solver_base.hpp" -#include "solvers/solver_utils.hpp" -#include "tasks/tasks.hpp" -#include "utils/robust.hpp" -#include "utils/type_list.hpp" - -namespace parthenon { - -namespace solvers { - -struct MGParams { - int max_iters = 1000; - Real residual_tolerance = 1.e-12; - bool do_FAS = true; - std::string smoother = "SRJ2"; - bool two_by_two_diagonal = false; - int max_coarsenings = std::numeric_limits::max(); - std::string prolongation = "OldLinear"; - - MGParams() = default; - MGParams(ParameterInput *pin, const std::string &input_block) { - max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); - residual_tolerance = - pin->GetOrAddReal(input_block, "residual_tolerance", residual_tolerance); - do_FAS = pin->GetOrAddBoolean(input_block, "do_FAS", do_FAS); - smoother = pin->GetOrAddString(input_block, "smoother", smoother); - prolongation = pin->GetOrAddString(input_block, "prolongation", prolongation); - two_by_two_diagonal = - pin->GetOrAddBoolean(input_block, "two_by_two_diagonal", two_by_two_diagonal); - max_coarsenings = - pin->GetOrAddInteger(input_block, "max_coarsenings", max_coarsenings); - } -}; - -// The equations class must include a template method -// -// template -// TaskID Ax(TL_t &tl, TaskID depends_on, std::shared_ptr> &md) -// -// that takes a field associated with x_t and applies -// the matrix A to it and stores the result in y_t. Additionally, -// it must include a template method -// -// template -// TaskStatus SetDiagonal(std::shared_ptr> &md) -// -// That stores the (possibly approximate) diagonal of matrix A in the field -// associated with the type diag_t. This is used for Jacobi iteration. -template -class MGSolver : public SolverBase { - public: - PARTHENON_INTERNALSOLVERVARIABLE( - u, res_err); // residual on the way up and error on the way down - PARTHENON_INTERNALSOLVERVARIABLE(u, temp); // Temporary storage - PARTHENON_INTERNALSOLVERVARIABLE(u, u0); // Storage for initial solution during FAS - PARTHENON_INTERNALSOLVERVARIABLE(u, D); // Storage for (approximate) diagonal - - using internal_types_tl = TypeList; - std::vector GetInternalVariableNames() const { - std::vector names; - internal_types_tl::IterateTypes( - [&names](auto t) { names.push_back(decltype(t)::name()); }); - return names; - } - - MGSolver(StateDescriptor *pkg, MGParams params_in, equations eq_in = equations(), - std::vector shape = {}, const std::string &container = "base") - : params_(params_in), iter_counter(0), eqs_(eq_in), container_(container) { - using namespace parthenon::refinement_ops; - // The ghost cells of res_err need to be filled, but this is accomplished by - // copying res_err into u, communicating, then copying u back into res_err - // across all zones in a block - auto mres_err = - Metadata({Metadata::Cell, Metadata::Independent, Metadata::GMGRestrict, - Metadata::GMGProlongate, Metadata::OneCopy}, - shape); - - mres_err.RegisterRefinementOps(); - pkg->AddField(res_err::name(), mres_err); - - auto mtemp = - Metadata({Metadata::Cell, Metadata::Independent, Metadata::OneCopy}, shape); - mtemp.RegisterRefinementOps(); - pkg->AddField(temp::name(), mtemp); - - auto mu0 = Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, shape); - pkg->AddField(u0::name(), mu0); - auto Dshape = shape; - if (params_.two_by_two_diagonal) { - Dshape = std::vector{4}; - } - auto mD = Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, Dshape); - pkg->AddField(D::name(), mD); - } - - TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace utils; - TaskID none; - auto [itl, solve_id] = tl.AddSublist(dependence, {1, this->params_.max_iters}); - iter_counter = -1; - auto update_iter = itl.AddTask( - TaskQualifier::local_sync | TaskQualifier::once_per_region, none, "print", - [](int *iter_counter) { - (*iter_counter)++; - if (*iter_counter > 1 || Globals::my_rank != 0) return TaskStatus::complete; - printf("# [0] v-cycle\n# [1] rms-residual\n# [2] rms-error\n"); - return TaskStatus::complete; - }, - &iter_counter); - auto mg_finest = AddLinearOperatorTasks(itl, update_iter, partition, pmesh); - - auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::leaf()); - if (partition >= partitions.size()) - PARTHENON_FAIL("Does not work with non-default partitioning."); - auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); - auto comm = AddBoundaryExchangeTasks(mg_finest, itl, md, - pmesh->multilevel); - auto calc_pointwise_res = eqs_.template Ax(itl, comm, md); - calc_pointwise_res = itl.AddTask( - calc_pointwise_res, TF(AddFieldsAndStoreInteriorSelect), - md, 1.0, -1.0, false); - auto get_res = DotProduct(calc_pointwise_res, itl, &residual, md); - - auto check = itl.AddTask( - TaskQualifier::completion, get_res, "Check residual", - [partition](MGSolver *solver, Mesh *pmesh) { - Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); - if (Globals::my_rank == 0 && partition == 0) - printf("%i %e\n", solver->iter_counter, rms_res); - solver->final_residual = rms_res; - solver->final_iteration = solver->iter_counter; - if (rms_res > solver->params_.residual_tolerance) return TaskStatus::iterate; - return TaskStatus::complete; - }, - this, pmesh); - - return solve_id; - } - - TaskID AddLinearOperatorTasks(TaskList &tl, TaskID dependence, int partition, - Mesh *pmesh) { - using namespace utils; - iter_counter = 0; - - int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, - pmesh->GetGMGMinLevel()); - int max_level = pmesh->GetGMGMaxLevel(); - // We require a local pre- and post-MG sync since multigrid iterations require - // communication across blocks and partitions on the multigrid levels do not - // necessarily contain the same blocks as partitions on the leaf grid. This - // means that without the syncs, leaf partitions can receive messages erroneously - // receive messages and/or update block data during a MG step. - auto pre_sync = tl.AddTask(TaskQualifier::local_sync, dependence, - []() { return TaskStatus::complete; }); - auto mg = pre_sync; - for (int level = max_level; level >= min_level; --level) { - mg = mg | AddMultiGridTasksPartitionLevel(tl, dependence, partition, level, - min_level, max_level, pmesh); - } - auto post_sync = - tl.AddTask(TaskQualifier::local_sync, mg, []() { return TaskStatus::complete; }); - return post_sync; - } - - TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - using namespace utils; - - int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, - pmesh->GetGMGMinLevel()); - int max_level = pmesh->GetGMGMaxLevel(); - - auto mg_setup = dependence; - for (int level = max_level; level >= min_level; --level) { - mg_setup = - mg_setup | AddMultiGridSetupPartitionLevel(tl, dependence, partition, level, - min_level, max_level, pmesh); - } - return mg_setup; - } - - Real GetSquaredResidualSum() const { return residual.val; } - int GetCurrentIterations() const { return iter_counter; } - - protected: - MGParams params_; - int iter_counter; - AllReduce residual; - equations eqs_; - std::string container_; - - // These functions apparently have to be public to compile with cuda since - // they contain device side lambdas - public: - template - TaskStatus Jacobi(std::shared_ptr> &md, double weight) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - if (md->grid.type == GridType::two_level_composite) { - int current_level = md->grid.logical_level; - for (int b = 0; b < nblocks; ++b) { - include_block[b] = - md->GetBlockData(b)->GetBlockPointer()->loc.level() == current_level; - } - } - static auto desc = - parthenon::MakePackDescriptor(md.get()); - auto pack = desc.GetPack(md.get(), include_block); - if (params_.two_by_two_diagonal) { - parthenon::par_for( - "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const auto &coords = pack.GetCoordinates(b); - - const Real D11 = pack(b, te, D_t(0), k, j, i); - const Real D22 = pack(b, te, D_t(1), k, j, i); - const Real D12 = pack(b, te, D_t(2), k, j, i); - const Real D21 = pack(b, te, D_t(3), k, j, i); - const Real det = D11 * D22 - D12 * D21; - - const Real Du0 = D11 * pack(b, te, xold_t(0), k, j, i) + - D12 * pack(b, te, xold_t(1), k, j, i); - const Real Du1 = D21 * pack(b, te, xold_t(0), k, j, i) + - D22 * pack(b, te, xold_t(1), k, j, i); - - const Real t0 = - pack(b, te, rhs_t(0), k, j, i) - pack(b, te, Axold_t(0), k, j, i) + Du0; - const Real t1 = - pack(b, te, rhs_t(1), k, j, i) - pack(b, te, Axold_t(1), k, j, i) + Du1; - - const Real v0 = (D22 * t0 - D12 * t1) / det; - const Real v1 = (-D21 * t0 + D11 * t1) / det; - - pack(b, te, xnew_t(0), k, j, i) = - weight * v0 + (1.0 - weight) * pack(b, te, xold_t(0), k, j, i); - pack(b, te, xnew_t(1), k, j, i) = - weight * v1 + (1.0 - weight) * pack(b, te, xold_t(1), k, j, i); - }); - } else { - const int scratch_size = 0; - const int scratch_level = 0; - parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "Jacobi", DevExecSpace(), scratch_size, - scratch_level, 0, pack.GetNBlocks() - 1, kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { - const int nvars = - pack.GetUpperBound(b, xnew_t()) - pack.GetLowerBound(b, xnew_t()) + 1; - for (int c = 0; c < nvars; ++c) { - Real *Ax = &pack(b, te, Axold_t(c), k, jb.s, ib.s); - Real *diag = &pack(b, te, D_t(c), k, jb.s, ib.s); - Real *prhs = &pack(b, te, rhs_t(c), k, jb.s, ib.s); - Real *xo = &pack(b, te, xold_t(c), k, jb.s, ib.s); - Real *xn = &pack(b, te, xnew_t(c), k, jb.s, ib.s); - // Use ptr arithmetic to get the number of points we need to go over - // (including ghost zones) to get from (k, jb.s, ib.s) to (k, jb.e, ib.e) - const int npoints = &pack(b, te, Axold_t(c), k, jb.e, ib.e) - Ax + 1; - parthenon::par_for_inner( - DEFAULT_INNER_LOOP_PATTERN, member, 0, npoints - 1, [&](const int idx) { - const Real off_diag = Ax[idx] - diag[idx] * xo[idx]; - const Real val = prhs[idx] - off_diag; - xn[idx] = - weight * robust::ratio(val, diag[idx]) + (1.0 - weight) * xo[idx]; - }); - } - }); - } - return TaskStatus::complete; - } - - template - TaskID AddJacobiIteration(TL_t &tl, TaskID depends_on, bool multilevel, Real omega, - std::shared_ptr> &md, - std::shared_ptr> &md_comm) { - using namespace utils; - - auto comm = - AddBoundaryExchangeTasks(depends_on, tl, md_comm, multilevel); - auto mat_mult = eqs_.template Ax(tl, comm, md); - return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, - md, omega); - } - - template - TaskID AddSRJIteration(TL_t &tl, TaskID depends_on, int stages, bool multilevel, - std::shared_ptr> &md, - std::shared_ptr> &md_comm) { - using namespace utils; - int ndim = md->GetParentPointer()->ndim; - - std::array, 3> omega_M1{ - {{1.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {1.0, 0.0, 0.0}}}; - // Damping factors from Yang & Mittal (2017) - std::array, 3> omega_M2{ - {{0.8723, 0.5395, 0.0000}, {1.3895, 0.5617, 0.0000}, {1.7319, 0.5695, 0.0000}}}; - std::array, 3> omega_M3{ - {{0.9372, 0.6667, 0.5173}, {1.6653, 0.8000, 0.5264}, {2.2473, 0.8571, 0.5296}}}; - - if (stages == 0) return depends_on; - auto omega = omega_M1; - if (stages == 2) omega = omega_M2; - if (stages == 3) omega = omega_M3; - // This copy is to set the coarse blocks in temp to the values in u so that - // fine-coarse boundaries of temp are correctly updated during communication - depends_on = tl.AddTask(depends_on, TF(CopyData), md); - auto jacobi1 = AddJacobiIteration( - tl, depends_on, multilevel, omega[ndim - 1][0], md, md_comm); - auto copy1 = tl.AddTask(jacobi1, TF(CopyData), md); - if (stages < 2) return copy1; - auto jacobi2 = AddJacobiIteration( - tl, copy1, multilevel, omega[ndim - 1][1], md, md_comm); - auto copy2 = tl.AddTask(jacobi2, TF(CopyData), md); - if (stages < 3) return copy2; - auto jacobi3 = AddJacobiIteration( - tl, copy2, multilevel, omega[ndim - 1][2], md, md_comm); - return tl.AddTask(jacobi3, TF(CopyData), md); - } - - template - TaskID AddMultiGridSetupPartitionLevel(TL_t &tl, TaskID dependence, int partition, - int level, int min_level, int max_level, - Mesh *pmesh) { - using namespace utils; - - auto partitions = - pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); - if (partition >= partitions.size()) return dependence; - auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); - - auto task_out = dependence; - if (level < max_level) { - task_out = - tl.AddTask(task_out, TF(ReceiveBoundBufs), md); - task_out = tl.AddTask(task_out, TF(SetBounds), md); - } - - // If we are finer than the coarsest level: - if (level > min_level) { - task_out = - tl.AddTask(task_out, TF(SendBoundBufs), md); - } - - // The boundaries are not up to date on return - return task_out; - } - - TaskID AddMultiGridTasksPartitionLevel(TaskList &tl, TaskID dependence, int partition, - int level, int min_level, int max_level, - Mesh *pmesh) { - using namespace utils; - auto smoother = params_.smoother; - bool do_FAS = params_.do_FAS; - int pre_stages, post_stages; - if (smoother == "none") { - pre_stages = 0; - post_stages = 0; - } else if (smoother == "SRJ1") { - pre_stages = 1; - post_stages = 1; - } else if (smoother == "SRJ2") { - pre_stages = 2; - post_stages = 2; - } else if (smoother == "SRJ3") { - pre_stages = 3; - post_stages = 3; - } else { - PARTHENON_FAIL("Unknown smoother type."); - } - -// auto decorate_task_name = [partition, level](const std::string &in, auto b) { -// return std::make_tuple(in + "(p:" + std::to_string(partition) + -// ", l:" + std::to_string(level) + ")", -// 1, b); -// }; - -// #define BTF(...) decorate_task_name(TF(__VA_ARGS__)) -#define BTF(...) TF(__VA_ARGS__) - bool multilevel = (level != min_level); - - auto partitions = - pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); - if (partition >= partitions.size()) return dependence; - auto &md = pmesh->mesh_data.Add(container_, partitions[partition]); - auto &md_comm = pmesh->mesh_data.AddShallow( - "mg_comm", md, std::vector{u::name(), res_err::name()}); - - // 0. Receive residual from coarser level if there is one - auto set_from_finer = dependence; - if (level < max_level) { - // Fill fields with restricted values - auto recv_from_finer = tl.AddTask( - dependence, TF(ReceiveBoundBufs), md_comm); - set_from_finer = tl.AddTask( - recv_from_finer, BTF(SetBounds), md_comm); - // 1. Copy residual from dual purpose communication field to the rhs, should be - // actual RHS for finest level - if (!do_FAS) { - auto zero_u = tl.AddTask(set_from_finer, BTF(SetToZero), md); - auto copy_rhs = tl.AddTask(set_from_finer, BTF(CopyData), md); - set_from_finer = zero_u | copy_rhs; - } else { - // TODO(LFR): Determine if this boundary exchange task is required, I think it is - // to make sure that the boundaries of the restricted u are up to date before - // calling Ax. That being said, at least in one case commenting this line out - // didn't seem to impact the solution. - set_from_finer = AddBoundaryExchangeTasks( - set_from_finer, tl, md_comm, multilevel); - set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md); - // This should set the rhs only in blocks that correspond to interior nodes, the - // RHS of leaf blocks that are on this GMG level should have already been set on - // entry into multigrid - set_from_finer = eqs_.template Ax(tl, set_from_finer, md); - set_from_finer = - tl.AddTask(set_from_finer, - BTF(AddFieldsAndStoreInteriorSelect), md, - 1.0, 1.0, true); - } - } else { - set_from_finer = tl.AddTask(set_from_finer, BTF(CopyData), md); - } - - // 2. Do pre-smooth and fill solution on this level - set_from_finer = - tl.AddTask(set_from_finer, BTF(&equations::template SetDiagonal), &eqs_, md); - auto pre_smooth = AddSRJIteration( - tl, set_from_finer, pre_stages, multilevel, md, md_comm); - // If we are finer than the coarsest level: - auto post_smooth = pre_smooth; - if (level > min_level) { - // 3. Communicate same level boundaries so that u is up to date everywhere - auto comm_u = AddBoundaryExchangeTasks(pre_smooth, tl, - md_comm, multilevel); - - // 4. Caclulate residual and store in communication field - auto residual = eqs_.template Ax(tl, comm_u, md); - residual = tl.AddTask( - residual, BTF(AddFieldsAndStoreInteriorSelect), md, - 1.0, -1.0, false); - - // 5. Restrict communication field and send to next level - auto communicate_to_coarse = tl.AddTask( - residual, BTF(SendBoundBufs), md_comm); - - // 6. Receive error field into communication field and prolongate - auto recv_from_coarser = - tl.AddTask(communicate_to_coarse, - TF(ReceiveBoundBufs), md_comm); - auto set_from_coarser = tl.AddTask( - recv_from_coarser, BTF(SetBounds), md_comm); - auto prolongate = set_from_coarser; - if (params_.prolongation == "User") { - prolongate = eqs_.template Prolongate(tl, set_from_coarser, md_comm); - } else { - prolongate = - tl.AddTask(set_from_coarser, - BTF(ProlongateBounds), md_comm); - } - - // 7. Correct solution on this level with res_err field and store in - // communication field - auto update_sol = tl.AddTask( - prolongate, BTF(AddFieldsAndStore), md, 1.0, 1.0); - - // 8. Post smooth using communication field and stored RHS - post_smooth = AddSRJIteration(tl, update_sol, post_stages, - multilevel, md, md_comm); - - } else { - post_smooth = tl.AddTask(pre_smooth, BTF(CopyData), md); - } - - // 9. Send communication field to next finer level (should be error field for that - // level) - TaskID last_task = post_smooth; - if (level < max_level) { - auto copy_over = post_smooth; - if (!do_FAS) { - copy_over = tl.AddTask(post_smooth, BTF(CopyData), md); - } else { - auto calc_err = tl.AddTask( - post_smooth, BTF(AddFieldsAndStore), md, 1.0, -1.0); - copy_over = calc_err; - } - // This is required to make sure boundaries of res_err are up to date before - // prolongation - copy_over = tl.AddTask(copy_over, BTF(CopyData), md); - copy_over = tl.AddTask(copy_over, BTF(CopyData), md); - auto boundary = AddBoundaryExchangeTasks( - copy_over, tl, md_comm, multilevel); - auto copy_back = tl.AddTask(boundary, BTF(CopyData), md); - copy_back = tl.AddTask(copy_back, BTF(CopyData), md); - last_task = tl.AddTask(copy_back, - BTF(SendBoundBufs), md); - } - // The boundaries are not up to date on return - return last_task; -#undef BTF - } -}; - -} // namespace solvers - -} // namespace parthenon - -#endif // SOLVERS_MG_SOLVER_HPP_ diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 4c9c4fb1594c..428c93e05247 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -36,6 +36,30 @@ namespace parthenon { namespace solvers { +struct MGParams { + int max_iters = 1000; + Real residual_tolerance = 1.e-12; + bool do_FAS = true; + std::string smoother = "SRJ2"; + bool two_by_two_diagonal = false; + int max_coarsenings = std::numeric_limits::max(); + std::string prolongation = "OldLinear"; + + MGParams() = default; + MGParams(ParameterInput *pin, const std::string &input_block) { + max_iters = pin->GetOrAddInteger(input_block, "max_iterations", max_iters); + residual_tolerance = + pin->GetOrAddReal(input_block, "residual_tolerance", residual_tolerance); + do_FAS = pin->GetOrAddBoolean(input_block, "do_FAS", do_FAS); + smoother = pin->GetOrAddString(input_block, "smoother", smoother); + prolongation = pin->GetOrAddString(input_block, "prolongation", prolongation); + two_by_two_diagonal = + pin->GetOrAddBoolean(input_block, "two_by_two_diagonal", two_by_two_diagonal); + max_coarsenings = + pin->GetOrAddInteger(input_block, "max_coarsenings", max_coarsenings); + } +}; + // The equations_t class must include a template method // // template From c882ab22977d0e9aaccdece7432aca65802f8a87 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 14:56:21 -0700 Subject: [PATCH 53/62] remove more stuff --- example/poisson_gmg/poisson_driver.cpp | 2 +- src/CMakeLists.txt | 1 - src/solvers/bicgstab_solver_stages.hpp | 4 +- src/solvers/cg_solver_stages.hpp | 4 +- src/solvers/mg_solver_stages.hpp | 16 +- src/solvers/solver_utils.hpp | 205 +++++++++++++++++++--- src/solvers/solver_utils_stages.hpp | 227 ------------------------- src/utils/type_list.hpp | 9 + 8 files changed, 208 insertions(+), 260 deletions(-) delete mode 100644 src/solvers/solver_utils_stages.hpp diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index b9a919d59100..a15de50b5851 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -88,7 +88,7 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // Move the rhs variable into the rhs stage for stage based solver auto copy_rhs = tl.AddTask(get_rhs, TF(solvers::utils::CopyData), md); copy_rhs = tl.AddTask( - copy_rhs, TF(solvers::StageUtils::CopyData>), md, md_rhs); + copy_rhs, TF(solvers::utils::CopyData>), md, md_rhs); // Set initial solution guess to zero auto zero_u = tl.AddTask(copy_rhs, TF(solvers::utils::SetToZero), md); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7ca142d809a5..0dbbec227393 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -235,7 +235,6 @@ add_library(parthenon solvers/mg_solver_stages.hpp solvers/solver_base.hpp solvers/solver_utils.hpp - solvers/solver_utils_stages.hpp tasks/tasks.cpp tasks/tasks.hpp diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 223e93777344..297964e11c27 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -25,7 +25,7 @@ #include "kokkos_abstraction.hpp" #include "solvers/mg_solver_stages.hpp" #include "solvers/solver_base.hpp" -#include "solvers/solver_utils_stages.hpp" +#include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" @@ -126,7 +126,7 @@ class BiCGSTABSolverStages : public SolverBase { } TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; TaskID none; auto partitions = pmesh->GetDefaultBlockPartitions(); diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index 07897b30d71b..a99747310d7d 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -26,7 +26,7 @@ #include "kokkos_abstraction.hpp" #include "solvers/mg_solver_stages.hpp" #include "solvers/solver_base.hpp" -#include "solvers/solver_utils_stages.hpp" +#include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" #include "utils/type_list.hpp" @@ -102,7 +102,7 @@ class CGSolverStages : public SolverBase { } TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; TaskID none; auto partitions = pmesh->GetDefaultBlockPartitions(); // Should contain all fields necessary for applying the matrix to a give state vector, diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index 428c93e05247..d71833a29596 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -27,7 +27,7 @@ #include "kokkos_abstraction.hpp" #include "solvers/internal_prolongation.hpp" #include "solvers/solver_base.hpp" -#include "solvers/solver_utils_stages.hpp" +#include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" #include "utils/robust.hpp" #include "utils/type_list.hpp" @@ -118,7 +118,7 @@ class MGSolverStages : public SolverBase { } TaskID AddTasks(TaskList &tl, TaskID dependence, const int partition, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; TaskID none; auto [itl, solve_id] = tl.AddSublist(dependence, {1, this->params_.max_iters}); iter_counter = -1; @@ -167,7 +167,7 @@ class MGSolverStages : public SolverBase { TaskID AddLinearOperatorTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; iter_counter = 0; int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, @@ -191,7 +191,7 @@ class MGSolverStages : public SolverBase { } TaskID AddSetupTasks(TaskList &tl, TaskID dependence, int partition, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; int min_level = std::max(pmesh->GetGMGMaxLevel() - params_.max_coarsenings, pmesh->GetGMGMinLevel()); @@ -281,7 +281,7 @@ class MGSolverStages : public SolverBase { int partition, int level, std::shared_ptr> &md_in, std::shared_ptr> &md_out) { - using namespace StageUtils; + using namespace utils; auto pmesh = md_in->GetMeshPointer(); auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); @@ -299,7 +299,7 @@ class MGSolverStages : public SolverBase { template TaskID AddSRJIteration(TL_t &tl, TaskID depends_on, int stages, bool multilevel, int partition, int level, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; const int ndim = pmesh->ndim; auto partitions = @@ -340,7 +340,7 @@ class MGSolverStages : public SolverBase { TaskID AddMultiGridSetupPartitionLevel(TL_t &tl, TaskID dependence, int partition, int level, int min_level, int max_level, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::two_level_composite(level)); @@ -368,7 +368,7 @@ class MGSolverStages : public SolverBase { TaskID AddMultiGridTasksPartitionLevel(TaskList &tl, TaskID dependence, int partition, int level, int min_level, int max_level, Mesh *pmesh) { - using namespace StageUtils; + using namespace utils; auto smoother = params_.smoother; bool do_FAS = params_.do_FAS; int pre_stages, post_stages; diff --git a/src/solvers/solver_utils.hpp b/src/solvers/solver_utils.hpp index 6f203e4aa97e..40f9378c4920 100644 --- a/src/solvers/solver_utils.hpp +++ b/src/solvers/solver_utils.hpp @@ -178,6 +178,74 @@ TaskStatus CopyData(const std::shared_ptr> &md) { return TaskStatus::complete; } +template +TaskStatus CopyData(const std::shared_ptr> &md_in, + const std::shared_ptr> &md_out) { + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md_in->GetBoundsI(IndexDomain::entire, te); + IndexRange jb = md_in->GetBoundsJ(IndexDomain::entire, te); + IndexRange kb = md_in->GetBoundsK(IndexDomain::entire, te); + + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_in.get()); + auto pack_in = desc.GetPack(md_in.get(), only_fine_on_composite); + auto pack_out = desc.GetPack(md_out.get(), only_fine_on_composite); + const int scratch_size = 0; + const int scratch_level = 0; + // Warning: This inner loop strategy only works because we are using IndexDomain::entire + const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "CopyData", DevExecSpace(), scratch_size, scratch_level, + 0, pack_in.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + const int nvars = pack_in.GetUpperBound(b) - pack_in.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) { + Real *in = &pack_in(b, te, c, kb.s, jb.s, ib.s); + Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); + parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, + npoints_inner - 1, + [&](const int idx) { out[idx] = in[idx]; }); + } + }); + return TaskStatus::complete; +} + +template +TaskStatus SetToZero(const std::shared_ptr> &md) { + int nblocks = md->NumBlocks(); + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + static auto desc = [&]{ + if constexpr (isTypeList::value) { + return parthenon::MakePackDescriptorFromTypeList(md.get()); + } else { + return parthenon::MakePackDescriptor(md.get()); + } + }(); + auto pack = desc.GetPack(md.get(), only_fine_on_composite); + const size_t scratch_size_in_bytes = 0; + const int scratch_level = 1; + const int ng = parthenon::Globals::nghost; + parthenon::par_for_outer( + DEFAULT_OUTER_LOOP_PATTERN, "SetFieldsToZero", DevExecSpace(), + scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { + auto cb = GetIndexShape(pack(b, te, 0), ng); + const auto &coords = pack.GetCoordinates(b); + IndexRange ib = cb.GetBoundsI(IndexDomain::interior, te); + IndexRange jb = cb.GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = cb.GetBoundsK(IndexDomain::interior, te); + const int nvars = pack.GetUpperBound(b) - pack.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) { + parthenon::par_for_inner( + parthenon::inner_loop_pattern_simdfor_tag, member, kb.s, kb.e, jb.s, jb.e, + ib.s, ib.e, [&](int k, int j, int i) { pack(b, te, c, k, j, i) = 0.0; }); + } + }); + return TaskStatus::complete; +} + + template TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md, Real wa = 1.0, Real wb = 1.0, @@ -226,36 +294,60 @@ TaskStatus AddFieldsAndStore(const std::shared_ptr> &md, Real wa md, wa, wb, false); } -template -TaskStatus SetToZero(const std::shared_ptr> &md) { - int nblocks = md->NumBlocks(); +template +TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + const std::shared_ptr> &md_out, + Real wa = 1.0, Real wb = 1.0, + bool only_interior_blocks = false) { using TE = parthenon::TopologicalElement; TE te = TE::CC; - static auto desc = parthenon::MakePackDescriptor(md.get()); - auto pack = desc.GetPack(md.get(), only_fine_on_composite); - const size_t scratch_size_in_bytes = 0; - const int scratch_level = 1; - const int ng = parthenon::Globals::nghost; + IndexRange ib = md_a->GetBoundsI(IndexDomain::entire, te); + IndexRange jb = md_a->GetBoundsJ(IndexDomain::entire, te); + IndexRange kb = md_a->GetBoundsK(IndexDomain::entire, te); + + int nblocks = md_a->NumBlocks(); + std::vector include_block(nblocks, true); + if (only_interior_blocks) { + // The neighbors array will only be set for a block if its a leaf block + for (int b = 0; b < nblocks; ++b) + include_block[b] = md_a->GetBlockData(b)->GetBlockPointer()->neighbors.size() == 0; + } + + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); + auto pack_a = desc.GetPack(md_a.get(), include_block, only_fine_on_composite); + auto pack_b = desc.GetPack(md_b.get(), include_block, only_fine_on_composite); + auto pack_out = desc.GetPack(md_out.get(), include_block, only_fine_on_composite); + const int scratch_size = 0; + const int scratch_level = 0; + // Warning: This inner loop strategy only works because we are using IndexDomain::entire + const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "SetFieldsToZero", DevExecSpace(), - scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, + DEFAULT_OUTER_LOOP_PATTERN, "AddFieldsAndStore", DevExecSpace(), scratch_size, + scratch_level, 0, pack_a.GetNBlocks() - 1, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { - auto cb = GetIndexShape(pack(b, te, 0), ng); - const auto &coords = pack.GetCoordinates(b); - IndexRange ib = cb.GetBoundsI(IndexDomain::interior, te); - IndexRange jb = cb.GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = cb.GetBoundsK(IndexDomain::interior, te); - const int nvars = pack.GetUpperBound(b, var()) - pack.GetLowerBound(b, var()) + 1; + const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; for (int c = 0; c < nvars; ++c) { + Real *avar = &pack_a(b, te, c, kb.s, jb.s, ib.s); + Real *bvar = &pack_b(b, te, c, kb.s, jb.s, ib.s); + Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); parthenon::par_for_inner( - parthenon::inner_loop_pattern_simdfor_tag, member, kb.s, kb.e, jb.s, jb.e, - ib.s, ib.e, - [&](int k, int j, int i) { pack(b, te, var(c), k, j, i) = 0.0; }); + DEFAULT_INNER_LOOP_PATTERN, member, 0, npoints_inner - 1, + [&](const int idx) { out[idx] = wa * avar[idx] + wb * bvar[idx]; }); } }); return TaskStatus::complete; } +template +TaskStatus AddFieldsAndStore(const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + const std::shared_ptr> &md_out, Real wa = 1.0, + Real wb = 1.0) { + return AddFieldsAndStoreInteriorSelect(md_a, md_b, md_out, + wa, wb, false); +} + template TaskStatus ADividedByB(const std::shared_ptr> &md) { IndexRange ib = md->GetBoundsI(IndexDomain::interior); @@ -277,6 +369,29 @@ TaskStatus ADividedByB(const std::shared_ptr> &md) { return TaskStatus::complete; } +template +TaskStatus ADividedByB(const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + const std::shared_ptr> &md_out) { + IndexRange ib = md_a->GetBoundsI(IndexDomain::interior); + IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md_a->GetBoundsK(IndexDomain::interior); + + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); + auto pack_a = desc.GetPack(md_a.get()); + auto pack_b = desc.GetPack(md_b.get()); + auto pack_out = desc.GetPack(md_out.get()); + parthenon::par_for( + DEFAULT_LOOP_PATTERN, "DotProduct", DevExecSpace(), 0, pack_a.GetNBlocks() - 1, + kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; + for (int c = 0; c < nvars; ++c) + pack_out(b, c, k, j, i) = pack_a(b, c, k, j, i) / pack_b(b, c, k, j, i); + }); + return TaskStatus::complete; +} + template TaskStatus DotProductLocal(const std::shared_ptr> &md, AllReduce *adotb) { @@ -373,6 +488,58 @@ TaskID GlobalMin(TaskID dependency_in, TaskList &tl, AllReduce *amin, start_global_amin, &AllReduce::CheckReduce, amin); } +template +TaskStatus DotProductLocal(const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b, + AllReduce *adotb) { + using TE = parthenon::TopologicalElement; + TE te = TE::CC; + IndexRange ib = md_a->GetBoundsI(IndexDomain::interior, te); + IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior, te); + IndexRange kb = md_a->GetBoundsK(IndexDomain::interior, te); + + static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); + auto pack_a = desc.GetPack(md_a.get()); + auto pack_b = desc.GetPack(md_b.get()); + Real gsum(0); + parthenon::par_reduce( + parthenon::loop_pattern_mdrange_tag, "DotProduct", DevExecSpace(), 0, + pack_a.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lsum) { + const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; + // TODO(LFR): If this becomes a bottleneck, exploit hierarchical parallelism and + // pull the loop over vars outside of the innermost loop to promote + // vectorization. + for (int c = 0; c < nvars; ++c) + lsum += pack_a(b, te, c, k, j, i) * pack_b(b, te, c, k, j, i); + }, + Kokkos::Sum(gsum)); + adotb->val += gsum; + return TaskStatus::complete; +} + +template +TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, + const std::shared_ptr> &md_a, + const std::shared_ptr> &md_b) { + using namespace impl; + auto zero_adotb = tl.AddTask( + TaskQualifier::once_per_region | TaskQualifier::local_sync, dependency_in, + [](AllReduce *r) { + r->val = 0.0; + return TaskStatus::complete; + }, + adotb); + auto get_adotb = tl.AddTask(TaskQualifier::local_sync, zero_adotb, DotProductLocal, + md_a, md_b, adotb); + auto start_global_adotb = tl.AddTask(TaskQualifier::once_per_region, get_adotb, + &AllReduce::StartReduce, adotb, MPI_SUM); + auto finish_global_adotb = + tl.AddTask(TaskQualifier::once_per_region | TaskQualifier::local_sync, + start_global_adotb, &AllReduce::CheckReduce, adotb); + return finish_global_adotb; +} + } // namespace utils } // namespace solvers diff --git a/src/solvers/solver_utils_stages.hpp b/src/solvers/solver_utils_stages.hpp deleted file mode 100644 index 613bce728e07..000000000000 --- a/src/solvers/solver_utils_stages.hpp +++ /dev/null @@ -1,227 +0,0 @@ -//======================================================================================== -// (C) (or copyright) 2021-2024. Triad National Security, LLC. All rights reserved. -// -// This program was produced under U.S. Government contract 89233218CNA000001 for Los -// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -// for the U.S. Department of Energy/National Nuclear Security Administration. All rights -// in the program are reserved by Triad National Security, LLC, and the U.S. Department -// of Energy/National Nuclear Security Administration. The Government is granted for -// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -// license in this material to reproduce, prepare derivative works, distribute copies to -// the public, perform publicly and display publicly, and to permit others to do so. -//======================================================================================== -#ifndef SOLVERS_SOLVER_UTILS_STAGES_HPP_ -#define SOLVERS_SOLVER_UTILS_STAGES_HPP_ - -#include -#include -#include -#include -#include -#include - -#include "kokkos_abstraction.hpp" - -namespace parthenon { - -namespace solvers { - -namespace StageUtils { - -template -TaskStatus CopyData(const std::shared_ptr> &md_in, - const std::shared_ptr> &md_out) { - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md_in->GetBoundsI(IndexDomain::entire, te); - IndexRange jb = md_in->GetBoundsJ(IndexDomain::entire, te); - IndexRange kb = md_in->GetBoundsK(IndexDomain::entire, te); - - static auto desc = parthenon::MakePackDescriptorFromTypeList(md_in.get()); - auto pack_in = desc.GetPack(md_in.get(), only_fine_on_composite); - auto pack_out = desc.GetPack(md_out.get(), only_fine_on_composite); - const int scratch_size = 0; - const int scratch_level = 0; - // Warning: This inner loop strategy only works because we are using IndexDomain::entire - const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); - parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "CopyData", DevExecSpace(), scratch_size, scratch_level, - 0, pack_in.GetNBlocks() - 1, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { - const int nvars = pack_in.GetUpperBound(b) - pack_in.GetLowerBound(b) + 1; - for (int c = 0; c < nvars; ++c) { - Real *in = &pack_in(b, te, c, kb.s, jb.s, ib.s); - Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); - parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, - npoints_inner - 1, - [&](const int idx) { out[idx] = in[idx]; }); - } - }); - return TaskStatus::complete; -} - -template -TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md_a, - const std::shared_ptr> &md_b, - const std::shared_ptr> &md_out, - Real wa = 1.0, Real wb = 1.0, - bool only_interior_blocks = false) { - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md_a->GetBoundsI(IndexDomain::entire, te); - IndexRange jb = md_a->GetBoundsJ(IndexDomain::entire, te); - IndexRange kb = md_a->GetBoundsK(IndexDomain::entire, te); - - int nblocks = md_a->NumBlocks(); - std::vector include_block(nblocks, true); - if (only_interior_blocks) { - // The neighbors array will only be set for a block if its a leaf block - for (int b = 0; b < nblocks; ++b) - include_block[b] = md_a->GetBlockData(b)->GetBlockPointer()->neighbors.size() == 0; - } - - static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); - auto pack_a = desc.GetPack(md_a.get(), include_block, only_fine_on_composite); - auto pack_b = desc.GetPack(md_b.get(), include_block, only_fine_on_composite); - auto pack_out = desc.GetPack(md_out.get(), include_block, only_fine_on_composite); - const int scratch_size = 0; - const int scratch_level = 0; - // Warning: This inner loop strategy only works because we are using IndexDomain::entire - const int npoints_inner = (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); - parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "AddFieldsAndStore", DevExecSpace(), scratch_size, - scratch_level, 0, pack_a.GetNBlocks() - 1, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { - const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; - for (int c = 0; c < nvars; ++c) { - Real *avar = &pack_a(b, te, c, kb.s, jb.s, ib.s); - Real *bvar = &pack_b(b, te, c, kb.s, jb.s, ib.s); - Real *out = &pack_out(b, te, c, kb.s, jb.s, ib.s); - parthenon::par_for_inner( - DEFAULT_INNER_LOOP_PATTERN, member, 0, npoints_inner - 1, - [&](const int idx) { out[idx] = wa * avar[idx] + wb * bvar[idx]; }); - } - }); - return TaskStatus::complete; -} - -template -TaskStatus AddFieldsAndStore(const std::shared_ptr> &md_a, - const std::shared_ptr> &md_b, - const std::shared_ptr> &md_out, Real wa = 1.0, - Real wb = 1.0) { - return AddFieldsAndStoreInteriorSelect(md_a, md_b, md_out, - wa, wb, false); -} - -template -TaskStatus SetToZero(const std::shared_ptr> &md) { - int nblocks = md->NumBlocks(); - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - static auto desc = parthenon::MakePackDescriptorFromTypeList(md.get()); - auto pack = desc.GetPack(md.get(), only_fine_on_composite); - const size_t scratch_size_in_bytes = 0; - const int scratch_level = 1; - const int ng = parthenon::Globals::nghost; - parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "SetFieldsToZero", DevExecSpace(), - scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { - auto cb = GetIndexShape(pack(b, te, 0), ng); - const auto &coords = pack.GetCoordinates(b); - IndexRange ib = cb.GetBoundsI(IndexDomain::interior, te); - IndexRange jb = cb.GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = cb.GetBoundsK(IndexDomain::interior, te); - const int nvars = pack.GetUpperBound(b) - pack.GetLowerBound(b) + 1; - for (int c = 0; c < nvars; ++c) { - parthenon::par_for_inner( - parthenon::inner_loop_pattern_simdfor_tag, member, kb.s, kb.e, jb.s, jb.e, - ib.s, ib.e, [&](int k, int j, int i) { pack(b, te, c, k, j, i) = 0.0; }); - } - }); - return TaskStatus::complete; -} - -template -TaskStatus ADividedByB(const std::shared_ptr> &md_a, - const std::shared_ptr> &md_b, - const std::shared_ptr> &md_out) { - IndexRange ib = md_a->GetBoundsI(IndexDomain::interior); - IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior); - IndexRange kb = md_a->GetBoundsK(IndexDomain::interior); - - static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); - auto pack_a = desc.GetPack(md_a.get()); - auto pack_b = desc.GetPack(md_b.get()); - auto pack_out = desc.GetPack(md_out.get()); - parthenon::par_for( - DEFAULT_LOOP_PATTERN, "DotProduct", DevExecSpace(), 0, pack_a.GetNBlocks() - 1, - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; - for (int c = 0; c < nvars; ++c) - pack_out(b, c, k, j, i) = pack_a(b, c, k, j, i) / pack_b(b, c, k, j, i); - }); - return TaskStatus::complete; -} - -template -TaskStatus DotProductLocal(const std::shared_ptr> &md_a, - const std::shared_ptr> &md_b, - AllReduce *adotb) { - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md_a->GetBoundsI(IndexDomain::interior, te); - IndexRange jb = md_a->GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = md_a->GetBoundsK(IndexDomain::interior, te); - - static auto desc = parthenon::MakePackDescriptorFromTypeList(md_a.get()); - auto pack_a = desc.GetPack(md_a.get()); - auto pack_b = desc.GetPack(md_b.get()); - Real gsum(0); - parthenon::par_reduce( - parthenon::loop_pattern_mdrange_tag, "DotProduct", DevExecSpace(), 0, - pack_a.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lsum) { - const int nvars = pack_a.GetUpperBound(b) - pack_a.GetLowerBound(b) + 1; - // TODO(LFR): If this becomes a bottleneck, exploit hierarchical parallelism and - // pull the loop over vars outside of the innermost loop to promote - // vectorization. - for (int c = 0; c < nvars; ++c) - lsum += pack_a(b, te, c, k, j, i) * pack_b(b, te, c, k, j, i); - }, - Kokkos::Sum(gsum)); - adotb->val += gsum; - return TaskStatus::complete; -} - -template -TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, - const std::shared_ptr> &md_a, - const std::shared_ptr> &md_b) { - using namespace impl; - auto zero_adotb = tl.AddTask( - TaskQualifier::once_per_region | TaskQualifier::local_sync, dependency_in, - [](AllReduce *r) { - r->val = 0.0; - return TaskStatus::complete; - }, - adotb); - auto get_adotb = tl.AddTask(TaskQualifier::local_sync, zero_adotb, DotProductLocal, - md_a, md_b, adotb); - auto start_global_adotb = tl.AddTask(TaskQualifier::once_per_region, get_adotb, - &AllReduce::StartReduce, adotb, MPI_SUM); - auto finish_global_adotb = - tl.AddTask(TaskQualifier::once_per_region | TaskQualifier::local_sync, - start_global_adotb, &AllReduce::CheckReduce, adotb); - return finish_global_adotb; -} - -} // namespace StageUtils - -} // namespace solvers - -} // namespace parthenon - -#endif // SOLVERS_SOLVER_UTILS_STAGES_HPP_ diff --git a/src/utils/type_list.hpp b/src/utils/type_list.hpp index a17b7b8f7063..a401937302cb 100644 --- a/src/utils/type_list.hpp +++ b/src/utils/type_list.hpp @@ -92,6 +92,15 @@ auto GetNames() { TL::IterateTypes([&names](auto t) { names.push_back(decltype(t)::name()); }); return names; } + +template +struct isTypeList : public std::false_type + { }; + +template +struct isTypeList> : public std::true_type + { }; + } // namespace parthenon #endif // UTILS_TYPE_LIST_HPP_ From ca1f1f505f4907d05f0829786dc401490f425ee7 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 15:09:16 -0700 Subject: [PATCH 54/62] remove last thing --- example/poisson_gmg/CMakeLists.txt | 1 - example/poisson_gmg/poisson_driver.cpp | 20 +- example/poisson_gmg/poisson_equation.hpp | 436 ----------------------- example/poisson_gmg/poisson_package.cpp | 5 +- 4 files changed, 11 insertions(+), 451 deletions(-) delete mode 100644 example/poisson_gmg/poisson_equation.hpp diff --git a/example/poisson_gmg/CMakeLists.txt b/example/poisson_gmg/CMakeLists.txt index e63cdd274457..e95a78264919 100644 --- a/example/poisson_gmg/CMakeLists.txt +++ b/example/poisson_gmg/CMakeLists.txt @@ -17,7 +17,6 @@ if( "poisson-gmg-example" IN_LIST DRIVER_LIST OR NOT PARTHENON_DISABLE_EXAMPLES) poisson-gmg-example poisson_driver.cpp poisson_driver.hpp - poisson_equation.hpp poisson_equation_stages.hpp poisson_package.cpp poisson_package.hpp diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index a15de50b5851..0aa57537bd2b 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -25,7 +25,6 @@ #include "mesh/meshblock_pack.hpp" #include "parthenon/driver.hpp" #include "poisson_driver.hpp" -#include "poisson_equation.hpp" #include "poisson_equation_stages.hpp" #include "poisson_package.hpp" #include "prolong_restrict/prolong_restrict.hpp" @@ -75,21 +74,20 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto &md_u = pmesh->mesh_data.Add("u", md, {u::name()}); auto &md_rhs = pmesh->mesh_data.Add("rhs", md, {u::name()}); + // Move the rhs variable into the rhs stage for stage based solver + auto copy_rhs = tl.AddTask(none, TF(solvers::utils::CopyData), md); + copy_rhs = tl.AddTask( + copy_rhs, TF(solvers::utils::CopyData>), md, md_rhs); + // Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is // known when we solve A.u = rhs - auto get_rhs = none; if (use_exact_rhs) { - auto copy_exact = tl.AddTask(get_rhs, TF(solvers::utils::CopyData), md); - auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md, true); - auto *eqs = pkg->MutableParam("poisson_equation"); - get_rhs = eqs->Ax(tl, comm, md); + auto copy_exact = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData), md); + auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md_u, true); + auto *eqs = pkg->MutableParam>("poisson_equation"); + copy_rhs = eqs->Ax(tl, comm, md, md, md_rhs); } - // Move the rhs variable into the rhs stage for stage based solver - auto copy_rhs = tl.AddTask(get_rhs, TF(solvers::utils::CopyData), md); - copy_rhs = tl.AddTask( - copy_rhs, TF(solvers::utils::CopyData>), md, md_rhs); - // Set initial solution guess to zero auto zero_u = tl.AddTask(copy_rhs, TF(solvers::utils::SetToZero), md); zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp deleted file mode 100644 index 1d83013cbb49..000000000000 --- a/example/poisson_gmg/poisson_equation.hpp +++ /dev/null @@ -1,436 +0,0 @@ -//======================================================================================== -// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. -// -// This program was produced under U.S. Government contract 89233218CNA000001 for Los -// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -// for the U.S. Department of Energy/National Nuclear Security Administration. All rights -// in the program are reserved by Triad National Security, LLC, and the U.S. Department -// of Energy/National Nuclear Security Administration. The Government is granted for -// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -// license in this material to reproduce, prepare derivative works, distribute copies to -// the public, perform publicly and display publicly, and to permit others to do so. -//======================================================================================== -#ifndef EXAMPLE_POISSON_GMG_POISSON_EQUATION_HPP_ -#define EXAMPLE_POISSON_GMG_POISSON_EQUATION_HPP_ - -#include -#include -#include -#include - -#include -#include - -#include "poisson_package.hpp" - -namespace poisson_package { - -// This class implement methods for calculating A.x = y and returning the diagonal of A, -// where A is the the matrix representing the discretized Poisson equation on the grid. -// Here we implement the Laplace operator in terms of a flux divergence to (potentially) -// consistently deal with coarse fine boundaries on the grid. Only the routines Ax and -// SetDiagonal need to be defined for interfacing this with solvers. The other methods -// are internal, but can't be marked private or protected because they launch kernels -// on device. -class PoissonEquation { - public: - bool do_flux_cor = false; - bool set_flux_boundary = false; - bool include_flux_dx = false; - enum class ProlongationType { Constant, Linear, Kwak }; - ProlongationType prolongation_type = ProlongationType::Constant; - - PoissonEquation(parthenon::ParameterInput *pin, const std::string &label) { - do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); - set_flux_boundary = pin->GetOrAddBoolean(label, "set_flux_boundary", false); - include_flux_dx = - (pin->GetOrAddString(label, "boundary_prolongation", "Linear") == "Constant"); - auto pro_int = pin->GetOrAddString(label, "interior_prolongation", "Linear"); - if (pro_int == "Constant") { - prolongation_type = ProlongationType::Constant; - } else if (pro_int == "Linear") { - prolongation_type = ProlongationType::Linear; - } else if (pro_int == "Kwak") { - prolongation_type = ProlongationType::Kwak; - } else { - PARTHENON_FAIL("Invalid user prolongation type."); - } - } - - // Add tasks to calculate the result of the matrix A (which is implicitly defined by - // this class) being applied to x_t and store it in field out_t - template - parthenon::TaskID Ax(TL_t &tl, parthenon::TaskID depends_on, - std::shared_ptr> &md) { - auto flux_res = tl.AddTask(depends_on, CalculateFluxes, md); - if (set_flux_boundary) { - flux_res = tl.AddTask(flux_res, SetFluxBoundaries, md, include_flux_dx); - } - if (do_flux_cor && !(md->grid.type == parthenon::GridType::two_level_composite)) { - auto start_flxcor = - tl.AddTask(flux_res, parthenon::StartReceiveFluxCorrections, md); - auto send_flxcor = tl.AddTask(flux_res, parthenon::LoadAndSendFluxCorrections, md); - auto recv_flxcor = tl.AddTask(start_flxcor, parthenon::ReceiveFluxCorrections, md); - flux_res = tl.AddTask(recv_flxcor, parthenon::SetFluxCorrections, md); - } - return tl.AddTask(flux_res, FluxMultiplyMatrix, md); - } - - // Calculate an approximation to the diagonal of the matrix A and store it in diag_t. - // For a uniform grid or when flux correction is ignored, this diagonal calculation - // is exact. Exactness is (probably) not required since it is just used in Jacobi - // iterations. - template - parthenon::TaskStatus SetDiagonal(std::shared_ptr> &md) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); - - auto pkg = md->GetMeshPointer()->packages.Get("poisson_package"); - const auto alpha = pkg->Param("diagonal_alpha"); - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - - auto desc = parthenon::MakePackDescriptor(md.get()); - auto pack = desc.GetPack(md.get(), include_block); - parthenon::par_for( - "StoreDiagonal", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const auto &coords = pack.GetCoordinates(b); - // Build the unigrid diagonal of the matrix - Real dx1 = coords.template Dxc(k, j, i); - Real diag_elem = - -(pack(b, TE::F1, D(), k, j, i) + pack(b, TE::F1, D(), k, j, i + 1)) / - (dx1 * dx1) - - alpha; - if (ndim > 1) { - Real dx2 = coords.template Dxc(k, j, i); - diag_elem -= - (pack(b, TE::F2, D(), k, j, i) + pack(b, TE::F2, D(), k, j + 1, i)) / - (dx2 * dx2); - } - if (ndim > 2) { - Real dx3 = coords.template Dxc(k, j, i); - diag_elem -= - (pack(b, TE::F3, D(), k, j, i) + pack(b, TE::F3, D(), k + 1, j, i)) / - (dx3 * dx3); - } - pack(b, te, diag_t(), k, j, i) = diag_elem; - }); - return TaskStatus::complete; - } - - template - static parthenon::TaskStatus - CalculateFluxes(std::shared_ptr> &md) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); - - using TE = parthenon::TopologicalElement; - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - - auto desc = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); - auto pack = desc.GetPack(md.get(), include_block); - parthenon::par_for( - "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const auto &coords = pack.GetCoordinates(b); - Real dx1 = coords.template Dxc(k, j, i); - pack.flux(b, X1DIR, var_t(), k, j, i) = - pack(b, TE::F1, D(), k, j, i) / dx1 * - (pack(b, te, var_t(), k, j, i - 1) - pack(b, te, var_t(), k, j, i)); - if (i == ib.e) - pack.flux(b, X1DIR, var_t(), k, j, i + 1) = - pack(b, TE::F1, D(), k, j, i + 1) / dx1 * - (pack(b, te, var_t(), k, j, i) - pack(b, te, var_t(), k, j, i + 1)); - - if (ndim > 1) { - Real dx2 = coords.template Dxc(k, j, i); - pack.flux(b, X2DIR, var_t(), k, j, i) = - pack(b, TE::F2, D(), k, j, i) * - (pack(b, te, var_t(), k, j - 1, i) - pack(b, te, var_t(), k, j, i)) / dx2; - if (j == jb.e) - pack.flux(b, X2DIR, var_t(), k, j + 1, i) = - pack(b, TE::F2, D(), k, j + 1, i) * - (pack(b, te, var_t(), k, j, i) - pack(b, te, var_t(), k, j + 1, i)) / - dx2; - } - - if (ndim > 2) { - Real dx3 = coords.template Dxc(k, j, i); - pack.flux(b, X3DIR, var_t(), k, j, i) = - pack(b, TE::F3, D(), k, j, i) * - (pack(b, te, var_t(), k - 1, j, i) - pack(b, te, var_t(), k, j, i)) / dx3; - if (k == kb.e) - pack.flux(b, X2DIR, var_t(), k + 1, j, i) = - pack(b, TE::F3, D(), k + 1, j, i) * - (pack(b, te, var_t(), k, j, i) - pack(b, te, var_t(), k + 1, j, i)) / - dx3; - } - }); - return TaskStatus::complete; - } - - template - parthenon::TaskID Prolongate(parthenon::TaskList &tl, parthenon::TaskID depends_on, - std::shared_ptr> &md) { - if (prolongation_type == ProlongationType::Constant) { - return tl.AddTask(depends_on, ProlongateImpl, - md); - } else if (prolongation_type == ProlongationType::Linear) { - return tl.AddTask(depends_on, ProlongateImpl, - md); - } else if (prolongation_type == ProlongationType::Kwak) { - return tl.AddTask(depends_on, ProlongateImpl, - md); - } - return depends_on; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real LinearFactor(int d, bool lo_bound, bool up_bound) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1) return (2.0 + !up_bound) / 4.0; - if (d == -1) return (2.0 + !lo_bound) / 4.0; - if (d == 3) return !up_bound / 4.0; - if (d == -3) return !lo_bound / 4.0; - return 0.0; - } - - KOKKOS_FORCEINLINE_FUNCTION - static Real QuadraticFactor(int d) { - if (d == 0) return 1.0; // Indicates this dimension is not included - if (d == 1 || d == -1) return 30.0 / 32.0; - if (d == 3 || d == -3) return 5.0 / 32.0; - if (d == 5 || d == -5) return -3.0 / 32.0; - return 0.0; - } - - template - static parthenon::TaskStatus - ProlongateImpl(std::shared_ptr> &md) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - IndexRange ib = md->GetBoundsI(IndexDomain::interior); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior); - IndexRange kb = md->GetBoundsK(IndexDomain::interior); - IndexRange cib = md->GetBoundsI(CellLevel::coarse, IndexDomain::interior); - IndexRange cjb = md->GetBoundsJ(CellLevel::coarse, IndexDomain::interior); - IndexRange ckb = md->GetBoundsK(CellLevel::coarse, IndexDomain::interior); - - using TE = parthenon::TopologicalElement; - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - for (int b = 0; b < nblocks; ++b) { - include_block[b] = - md->grid.logical_level == md->GetBlockData(b)->GetBlockPointer()->loc.level(); - } - const auto desc = parthenon::MakePackDescriptor(md.get()); - const auto desc_coarse = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::Coarse}); - auto pack = desc.GetPack(md.get(), include_block); - auto pack_coarse = desc_coarse.GetPack(md.get(), include_block); - - parthenon::par_for( - "Prolongate", 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), - pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int n, const int fk, const int fj, - const int fi) { - const int ck = (ndim > 2) ? (fk - kb.s) / 2 + ckb.s : ckb.s; - const int cj = (ndim > 1) ? (fj - jb.s) / 2 + cjb.s : cjb.s; - const int ci = (ndim > 0) ? (fi - ib.s) / 2 + cib.s : cib.s; - const int fok = (fk - kb.s) % 2; - const int foj = (fj - jb.s) % 2; - const int foi = (fi - ib.s) % 2; - const bool bound[6]{pack.IsPhysicalBoundary(b, 0, 0, -1) && (ib.s == fi), - pack.IsPhysicalBoundary(b, 0, 0, 1) && (ib.e == fi), - pack.IsPhysicalBoundary(b, 0, -1, 0) && (jb.s == fj), - pack.IsPhysicalBoundary(b, 0, 1, 0) && (jb.e == fj), - pack.IsPhysicalBoundary(b, -1, 0, 0) && (kb.s == fk), - pack.IsPhysicalBoundary(b, 1, 0, 0) && (kb.e == fk)}; - // Use both pack and pack_coarse outside of the constexpr if - // statements to prevent compilation errors in some CUDA compilers - pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); - if constexpr (ProlongationType::Constant == prolongation_type) { - pack(b, n, fk, fj, fi) = pack_coarse(b, n, ck, cj, ci); - } else if constexpr (ProlongationType::Linear == prolongation_type) { - pack(b, n, fk, fj, fi) = 0.0; - for (int ok = -(ndim > 2); ok < 1 + (ndim > 2); ++ok) { - for (int oj = -(ndim > 1); oj < 1 + (ndim > 1); ++oj) { - for (int oi = -(ndim > 0); oi < 1 + (ndim > 0); ++oi) { - const int dx3 = (ndim > 2) ? 4 * ok - (2 * fok - 1) : 0; - const int dx2 = (ndim > 1) ? 4 * oj - (2 * foj - 1) : 0; - const int dx1 = 4 * oi - (2 * foi - 1); - pack(b, n, fk, fj, fi) += LinearFactor(dx1, bound[0], bound[1]) * - LinearFactor(dx2, bound[2], bound[3]) * - LinearFactor(dx3, bound[4], bound[5]) * - pack_coarse(b, n, ck + ok, cj + oj, ci + oi); - } - } - } - } else if constexpr (ProlongationType::Kwak == prolongation_type) { - pack(b, n, fk, fj, fi) = 0.0; - if (ndim > 2 && !bound[4 + fok]) { - for (int ok = fok - 1; ok <= fok; ++ok) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck + ok, cj, ci); - } - } - if (ndim > 1 && !bound[2 + foj]) { - for (int oj = foj - 1; oj <= foj; ++oj) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj + oj, ci); - } - } - if (ndim > 0 && !bound[foi]) { - for (int oi = foi - 1; oi <= foi; ++oi) { - pack(b, n, fk, fj, fi) += pack_coarse(b, n, ck, cj, ci + oi); - } - } - pack(b, n, fk, fj, fi) /= 2.0 * ndim; - } - }); - return TaskStatus::complete; - } - - template - static parthenon::TaskStatus - SetFluxBoundaries(std::shared_ptr> &md, bool do_flux_dx) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - IndexRange ib = md->GetBoundsI(IndexDomain::interior); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior); - IndexRange kb = md->GetBoundsK(IndexDomain::interior); - - using TE = parthenon::TopologicalElement; - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - - auto desc = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); - auto pack = desc.GetPack(md.get(), include_block); - const std::size_t scratch_size_in_bytes = 0; - const std::size_t scratch_level = 1; - - const parthenon::Indexer3D idxers[6]{ - parthenon::Indexer3D(kb, jb, {ib.s, ib.s}), - parthenon::Indexer3D(kb, jb, {ib.e + 1, ib.e + 1}), - parthenon::Indexer3D(kb, {jb.s, jb.s}, ib), - parthenon::Indexer3D(kb, {jb.e + 1, jb.e + 1}, ib), - parthenon::Indexer3D({kb.s, kb.s}, jb, ib), - parthenon::Indexer3D({kb.e + 1, kb.e + 1}, jb, ib)}; - constexpr int x1off[6]{-1, 1, 0, 0, 0, 0}; - constexpr int x2off[6]{0, 0, -1, 1, 0, 0}; - constexpr int x3off[6]{0, 0, 0, 0, -1, 1}; - constexpr TE tes[6]{TE::F1, TE::F1, TE::F2, TE::F2, TE::F3, TE::F3}; - constexpr int dirs[6]{X1DIR, X1DIR, X2DIR, X2DIR, X3DIR, X3DIR}; - parthenon::par_for_outer( - DEFAULT_OUTER_LOOP_PATTERN, "SetFluxBoundaries", DevExecSpace(), - scratch_size_in_bytes, scratch_level, 0, pack.GetNBlocks() - 1, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b) { - const auto &coords = pack.GetCoordinates(b); - const int gid = pack.GetGID(b); - const int level = pack.GetLevel(b, 0, 0, 0); - const Real dxs[3]{coords.template Dxc(), coords.template Dxc(), - coords.template Dxc()}; - for (int face = 0; face < ndim * 2; ++face) { - const Real dx = dxs[dirs[face] - 1]; - const auto &idxer = idxers[face]; - const auto dir = dirs[face]; - const auto te = tes[face]; - // Impose the zero Dirichlet boundary condition at the actual boundary - if (pack.IsPhysicalBoundary(b, x3off[face], x2off[face], x1off[face])) { - const int koff = x3off[face] > 0 ? -1 : 0; - const int joff = x2off[face] > 0 ? -1 : 0; - const int ioff = x1off[face] > 0 ? -1 : 0; - const int sign = x1off[face] + x2off[face] + x3off[face]; - parthenon::par_for_inner( - DEFAULT_INNER_LOOP_PATTERN, member, 0, idxer.size() - 1, - [&](const int idx) { - const auto [k, j, i] = idxer(idx); - pack.flux(b, dir, var_t(), k, j, i) = - sign * pack(b, te, D(), k, j, i) * - pack(b, var_t(), k + koff, j + joff, i + ioff) / (0.5 * dx); - }); - } - // Correct for size of neighboring zone at fine-coarse boundary when using - // constant prolongation - if (do_flux_dx && - pack.GetLevel(b, x3off[face], x2off[face], x1off[face]) == level - 1) { - parthenon::par_for_inner(DEFAULT_INNER_LOOP_PATTERN, member, 0, - idxer.size() - 1, [&](const int idx) { - const auto [k, j, i] = idxer(idx); - pack.flux(b, dir, var_t(), k, j, i) /= 1.5; - }); - } - } - }); - return TaskStatus::complete; - } - - // Calculate A in_t = out_t (in the region covered by md) for a given set of fluxes - // calculated with in_t (which have possibly been corrected at coarse fine boundaries) - template - static parthenon::TaskStatus - FluxMultiplyMatrix(std::shared_ptr> &md) { - using namespace parthenon; - const int ndim = md->GetMeshPointer()->ndim; - using TE = parthenon::TopologicalElement; - TE te = TE::CC; - IndexRange ib = md->GetBoundsI(IndexDomain::interior, te); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior, te); - IndexRange kb = md->GetBoundsK(IndexDomain::interior, te); - - auto pkg = md->GetMeshPointer()->packages.Get("poisson_package"); - const auto alpha = pkg->Param("diagonal_alpha"); - - int nblocks = md->NumBlocks(); - std::vector include_block(nblocks, true); - - auto desc = - parthenon::MakePackDescriptor(md.get(), {}, {PDOpt::WithFluxes}); - auto pack = desc.GetPack(md.get(), include_block); - parthenon::par_for( - "FluxMultiplyMatrix", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const auto &coords = pack.GetCoordinates(b); - Real dx1 = coords.template Dxc(k, j, i); - pack(b, te, out_t(), k, j, i) = -alpha * pack(b, te, in_t(), k, j, i); - pack(b, te, out_t(), k, j, i) += (pack.flux(b, X1DIR, in_t(), k, j, i) - - pack.flux(b, X1DIR, in_t(), k, j, i + 1)) / - dx1; - - if (ndim > 1) { - Real dx2 = coords.template Dxc(k, j, i); - pack(b, te, out_t(), k, j, i) += (pack.flux(b, X2DIR, in_t(), k, j, i) - - pack.flux(b, X2DIR, in_t(), k, j + 1, i)) / - dx2; - } - - if (ndim > 2) { - Real dx3 = coords.template Dxc(k, j, i); - pack(b, te, out_t(), k, j, i) += (pack.flux(b, X3DIR, in_t(), k, j, i) - - pack.flux(b, X3DIR, in_t(), k + 1, j, i)) / - dx3; - } - }); - return TaskStatus::complete; - } -}; - -} // namespace poisson_package - -#endif // EXAMPLE_POISSON_GMG_POISSON_EQUATION_HPP_ diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 19dc7d7c7479..e34021a92e73 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -29,7 +29,6 @@ #include "defs.hpp" #include "kokkos_abstraction.hpp" -#include "poisson_equation.hpp" #include "poisson_equation_stages.hpp" #include "poisson_package.hpp" @@ -89,11 +88,11 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::string prolong = pin->GetOrAddString("poisson", "boundary_prolongation", "Linear"); - PoissonEquation eq(pin, "poisson"); + using PoissEqStages = poisson_package::PoissonEquationStages; + PoissEqStages eq(pin, "poisson"); pkg->AddParam<>("poisson_equation", eq, parthenon::Params::Mutability::Mutable); std::shared_ptr psolver; - using PoissEqStages = poisson_package::PoissonEquationStages; using prolongator_t = parthenon::solvers::ProlongationBlockInteriorDefault; using preconditioner_t = parthenon::solvers::MGSolverStages; From 37500d2c8a780587ac17d03c565908f80b813a8d Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 15:32:22 -0700 Subject: [PATCH 55/62] closer to resolving all stages --- doc/sphinx/src/solvers.rst | 2 +- example/poisson_gmg/poisson_driver.cpp | 10 +++++-- .../poisson_gmg/poisson_equation_stages.hpp | 4 +-- example/poisson_gmg/poisson_package.cpp | 24 ++++++++-------- src/solvers/bicgstab_solver_stages.hpp | 28 +++++++++---------- src/solvers/cg_solver_stages.hpp | 22 +++++++-------- src/solvers/mg_solver_stages.hpp | 12 ++++---- 7 files changed, 53 insertions(+), 49 deletions(-) diff --git a/doc/sphinx/src/solvers.rst b/doc/sphinx/src/solvers.rst index e3cbc7e2baa2..45b3b609e267 100644 --- a/doc/sphinx/src/solvers.rst +++ b/doc/sphinx/src/solvers.rst @@ -40,7 +40,7 @@ code along the lines of: std::string rhs_cont_name = "rhs"; MySystemOfEquations eqs(....); - std::shared_ptr psolver = std::make_shared>( + std::shared_ptr psolver = std::make_shared>( base_cont_name, u_cont_name, rhs_cont_name, pin, "location/of/solver_params", eqs); ... diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index 0aa57537bd2b..ef2664ac61f4 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -83,9 +83,11 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // known when we solve A.u = rhs if (use_exact_rhs) { auto copy_exact = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData), md); + copy_exact = tl.AddTask( + copy_rhs, TF(solvers::utils::CopyData>), md, md_u); auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md_u, true); - auto *eqs = pkg->MutableParam>("poisson_equation"); - copy_rhs = eqs->Ax(tl, comm, md, md, md_rhs); + auto *eqs = pkg->MutableParam>("poisson_equation"); + copy_rhs = eqs->Ax(tl, comm, md, md_u, md_rhs); } // Set initial solution guess to zero @@ -97,7 +99,9 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // If we are using a rhs to which we know the exact solution, compare our computed // solution to the exact solution if (use_exact_rhs) { - auto diff = tl.AddTask(solve, TF(solvers::utils::AddFieldsAndStore), + auto copy_back = tl.AddTask( + solve, TF(solvers::utils::CopyData>), md_u, md); + auto diff = tl.AddTask(copy_back, TF(solvers::utils::AddFieldsAndStore), md, 1.0, -1.0); auto get_err = solvers::utils::DotProduct(diff, tl, &err, md); tl.AddTask( diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation_stages.hpp index 8c935fe8c8b4..f6fd37f53b32 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation_stages.hpp @@ -34,7 +34,7 @@ namespace poisson_package { // are internal, but can't be marked private or protected because they launch kernels // on device. template -class PoissonEquationStages { +class PoissonEquation { public: bool do_flux_cor = false; bool set_flux_boundary = false; @@ -42,7 +42,7 @@ class PoissonEquationStages { using IndependentVars = parthenon::TypeList; - PoissonEquationStages(parthenon::ParameterInput *pin, const std::string &label) { + PoissonEquation(parthenon::ParameterInput *pin, const std::string &label) { do_flux_cor = pin->GetOrAddBoolean(label, "flux_correct", false); set_flux_boundary = pin->GetOrAddBoolean(label, "set_flux_boundary", false); include_flux_dx = diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index e34021a92e73..5f7ca3a464f2 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -88,26 +88,26 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::string prolong = pin->GetOrAddString("poisson", "boundary_prolongation", "Linear"); - using PoissEqStages = poisson_package::PoissonEquationStages; - PoissEqStages eq(pin, "poisson"); + using PoissEq = poisson_package::PoissonEquation; + PoissEq eq(pin, "poisson"); pkg->AddParam<>("poisson_equation", eq, parthenon::Params::Mutability::Mutable); std::shared_ptr psolver; using prolongator_t = parthenon::solvers::ProlongationBlockInteriorDefault; using preconditioner_t = - parthenon::solvers::MGSolverStages; - if (solver == "MGStages") { + parthenon::solvers::MGSolver; + if (solver == "MG") { psolver = std::make_shared< - parthenon::solvers::MGSolverStages>( - "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); - } else if (solver == "CGStages") { + parthenon::solvers::MGSolver>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); + } else if (solver == "CG") { psolver = std::make_shared< - parthenon::solvers::CGSolverStages>( - "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); - } else if (solver == "BiCGSTABStages") { + parthenon::solvers::CGSolver>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); + } else if (solver == "BiCGSTAB") { psolver = std::make_shared< - parthenon::solvers::BiCGSTABSolverStages>( - "base", "u", "rhs", pin, "poisson/solver_params", PoissEqStages(pin, "poisson")); + parthenon::solvers::BiCGSTABSolver>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); } else { PARTHENON_FAIL("Unknown solver type."); } diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver_stages.hpp index 297964e11c27..44c9b5778e77 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver_stages.hpp @@ -70,8 +70,8 @@ struct BiCGSTABParams { // // that takes a field associated with x_t and applies // the matrix A to it and stores the result in y_t. -template > -class BiCGSTABSolverStages : public SolverBase { +template > +class BiCGSTABSolver : public SolverBase { using FieldTL = typename equations::IndependentVars; std::vector sol_fields; @@ -90,7 +90,7 @@ class BiCGSTABSolverStages : public SolverBase { static inline std::size_t id{0}; public: - BiCGSTABSolverStages(const std::string &container_base, const std::string &container_u, + BiCGSTABSolver(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, ParameterInput *pin, const std::string &input_block, equations eq_in = equations()) : preconditioner(container_base, container_u, container_rhs, pin, input_block, @@ -167,14 +167,14 @@ class BiCGSTABSolverStages : public SolverBase { TaskQualifier::once_per_region | TaskQualifier::local_sync, zero_x | zero_u_init | copy_r | copy_p | copy_rhat0 | get_rhat0r_init | get_rhs2, "zero factors", - [](BiCGSTABSolverStages *solver) { + [](BiCGSTABSolver *solver) { solver->iter_counter = -1; return TaskStatus::complete; }, this); tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](BiCGSTABSolverStages *solver, std::shared_ptr res_tol, + [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, bool relative_residual, Mesh *pm) { if (Globals::my_rank == 0 && params_.print_per_step) { Real tol = relative_residual @@ -195,7 +195,7 @@ class BiCGSTABSolverStages : public SolverBase { []() { return TaskStatus::complete; }); auto reset = itl.AddTask( TaskQualifier::once_per_region, sync, "update values", - [](BiCGSTABSolverStages *solver) { + [](BiCGSTABSolver *solver) { solver->rhat0r_old = solver->rhat0r.val; solver->iter_counter++; return TaskStatus::complete; @@ -226,7 +226,7 @@ class BiCGSTABSolverStages : public SolverBase { // 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v) auto correct_h = itl.AddTask( get_rhat0v, "h <- x + alpha u", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_x, + [](BiCGSTABSolver *solver, std::shared_ptr> &md_x, std::shared_ptr> &md_u, std::shared_ptr> &md_h) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; return AddFieldsAndStore(md_x, md_u, md_h, 1.0, alpha); @@ -236,7 +236,7 @@ class BiCGSTABSolverStages : public SolverBase { // 5. s <- r - alpha v (alpha = rhat0r_old / rhat0v) auto correct_s = itl.AddTask( get_rhat0v, "s <- r - alpha v", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_r, + [](BiCGSTABSolver *solver, std::shared_ptr> &md_r, std::shared_ptr> &md_v, std::shared_ptr> &md_s) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; return AddFieldsAndStore(md_r, md_v, md_s, 1.0, -alpha); @@ -248,7 +248,7 @@ class BiCGSTABSolverStages : public SolverBase { auto print = itl.AddTask( TaskQualifier::once_per_region, get_res, - [&](BiCGSTABSolverStages *solver, Mesh *pmesh) { + [&](BiCGSTABSolver *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && solver->params_.print_per_step) printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res); @@ -281,7 +281,7 @@ class BiCGSTABSolverStages : public SolverBase { // 9. x <- h + omega u auto correct_x = itl.AddTask( get_tt | get_ts, "x <- h + omega u", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_h, + [](BiCGSTABSolver *solver, std::shared_ptr> &md_h, std::shared_ptr> &md_u, std::shared_ptr> &md_x) { Real omega = solver->ts.val / solver->tt.val; return AddFieldsAndStore(md_h, md_u, md_x, 1.0, omega); @@ -291,7 +291,7 @@ class BiCGSTABSolverStages : public SolverBase { // 10. r <- s - omega t auto correct_r = itl.AddTask( get_tt | get_ts, "r <- s - omega t", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_s, + [](BiCGSTABSolver *solver, std::shared_ptr> &md_s, std::shared_ptr> &md_t, std::shared_ptr> &md_r) { Real omega = solver->ts.val / solver->tt.val; return AddFieldsAndStore(md_s, md_t, md_r, 1.0, -omega); @@ -303,7 +303,7 @@ class BiCGSTABSolverStages : public SolverBase { get_res2 = itl.AddTask( TaskQualifier::once_per_region, get_res2, - [&](BiCGSTABSolverStages *solver, Mesh *pmesh) { + [&](BiCGSTABSolver *solver, Mesh *pmesh) { Real rms_err = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && solver->params_.print_per_step) printf("%i %e\n", solver->iter_counter * 2 + 2, rms_err); @@ -318,7 +318,7 @@ class BiCGSTABSolverStages : public SolverBase { // 13. p <- r + beta * (p - omega * v) auto update_p = itl.AddTask( get_rhat0r | get_res2, "p <- r + beta * (p - omega * v)", - [](BiCGSTABSolverStages *solver, std::shared_ptr> &md_p, + [](BiCGSTABSolver *solver, std::shared_ptr> &md_p, std::shared_ptr> &md_v, std::shared_ptr> &md_r) { Real alpha = solver->rhat0r_old / solver->rhat0v.val; Real omega = solver->ts.val / solver->tt.val; @@ -332,7 +332,7 @@ class BiCGSTABSolverStages : public SolverBase { // 14. rhat0r_old <- rhat0r, zero all reductions auto check = itl.AddTask( TaskQualifier::completion, update_p | correct_x, "rhat0r_old <- rhat0r", - [partition](BiCGSTABSolverStages *solver, Mesh *pmesh, int max_iter, + [partition](BiCGSTABSolver *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, bool relative_residual) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); solver->final_residual = rms_res; diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver_stages.hpp index a99747310d7d..73fa967e4015 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver_stages.hpp @@ -61,8 +61,8 @@ struct CGParams { // // that takes a field associated with x_t and applies // the matrix A to it and stores the result in y_t. -template > -class CGSolverStages : public SolverBase { +template > +class CGSolver : public SolverBase { using FieldTL = typename equations::IndependentVars; std::vector sol_fields; @@ -80,7 +80,7 @@ class CGSolverStages : public SolverBase { static inline std::size_t id{0}; public: - CGSolverStages(const std::string &container_base, const std::string &container_u, + CGSolver(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, ParameterInput *pin, const std::string &input_block, const equations &eq_in = equations()) : preconditioner(container_base, container_u, container_rhs, pin, input_block, @@ -136,7 +136,7 @@ class CGSolverStages : public SolverBase { auto initialize = tl.AddTask( TaskQualifier::once_per_region | TaskQualifier::local_sync, zero_u | zero_v | zero_x | zero_p | copy_r | get_rhs2, "zero factors", - [](CGSolverStages *solver) { + [](CGSolver *solver) { solver->iter_counter = -1; solver->ru.val = std::numeric_limits::max(); return TaskStatus::complete; @@ -146,7 +146,7 @@ class CGSolverStages : public SolverBase { if (params_.print_per_step && Globals::my_rank == 0) { initialize = tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolverStages *solver, std::shared_ptr res_tol, + [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual, Mesh *pm) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) @@ -166,7 +166,7 @@ class CGSolverStages : public SolverBase { []() { return TaskStatus::complete; }); auto reset = itl.AddTask( TaskQualifier::once_per_region, sync, "update values", - [](CGSolverStages *solver) { + [](CGSolver *solver) { solver->ru_old = solver->ru.val; solver->iter_counter++; return TaskStatus::complete; @@ -190,7 +190,7 @@ class CGSolverStages : public SolverBase { // 3. p <- u + beta p auto correct_p = itl.AddTask( get_ru, "p <- u + beta p", - [](CGSolverStages *solver, std::shared_ptr> &md_u, + [](CGSolver *solver, std::shared_ptr> &md_u, std::shared_ptr> &md_p) { Real beta = solver->iter_counter > 0 ? solver->ru.val / solver->ru_old : 0.0; return AddFieldsAndStore(md_u, md_p, md_p, 1.0, beta); @@ -208,7 +208,7 @@ class CGSolverStages : public SolverBase { // 6. x <- x + alpha p auto correct_x = itl.AddTask( get_pAp, "x <- x + alpha p", - [](CGSolverStages *solver, std::shared_ptr> &md_x, + [](CGSolver *solver, std::shared_ptr> &md_x, std::shared_ptr> &md_p) { Real alpha = solver->ru.val / solver->pAp.val; return AddFieldsAndStore(md_x, md_p, md_x, 1.0, alpha); @@ -218,7 +218,7 @@ class CGSolverStages : public SolverBase { // 6. r <- r - alpha A p auto correct_r = itl.AddTask( get_pAp, "r <- r - alpha A p", - [](CGSolverStages *solver, std::shared_ptr> &md_r, + [](CGSolver *solver, std::shared_ptr> &md_r, std::shared_ptr> &md_v) { Real alpha = solver->ru.val / solver->pAp.val; return AddFieldsAndStore(md_r, md_v, md_r, 1.0, -alpha); @@ -230,7 +230,7 @@ class CGSolverStages : public SolverBase { auto print = itl.AddTask( TaskQualifier::once_per_region, get_res, - [&](CGSolverStages *solver, Mesh *pmesh) { + [&](CGSolver *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && solver->params_.print_per_step) printf("%i %e\n", solver->iter_counter, rms_res); @@ -240,7 +240,7 @@ class CGSolverStages : public SolverBase { auto check = itl.AddTask( TaskQualifier::completion, get_res | correct_x, "completion", - [](CGSolverStages *solver, Mesh *pmesh, int max_iter, + [](CGSolver *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, bool relative_residual) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); solver->final_residual = rms_res; diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver_stages.hpp index d71833a29596..54719714625a 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver_stages.hpp @@ -75,7 +75,7 @@ struct MGParams { // That stores the (possibly approximate) diagonal of matrix A in the field // associated with the type diag_t. This is used for Jacobi iteration. template -class MGSolverStages : public SolverBase { +class MGSolver : public SolverBase { static inline std::size_t id{0}; public: using FieldTL = typename equations_t::IndependentVars; @@ -94,14 +94,14 @@ class MGSolverStages : public SolverBase { // Internal containers for solver which create deep copies of sol_fields std::string container_res_err, container_temp, container_u0, container_diag; - MGSolverStages(const std::string &container_base, const std::string &container_u, + MGSolver(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, ParameterInput *pin, const std::string &input_block, equations_t eq_in = equations_t()) - : MGSolverStages(container_base, container_u, container_rhs, + : MGSolver(container_base, container_u, container_rhs, MGParams(pin, input_block), eq_in, prolongator_t(pin, input_block)) {} - MGSolverStages(const std::string &container_base, const std::string &container_u, + MGSolver(const std::string &container_base, const std::string &container_u, const std::string &container_rhs, MGParams params_in, equations_t eq_in = equations_t(), prolongator_t prol_in = prolongator_t()) @@ -151,7 +151,7 @@ class MGSolverStages : public SolverBase { auto check = itl.AddTask( TaskQualifier::completion, get_res, "Check residual", - [partition](MGSolverStages *solver, Mesh *pmesh) { + [partition](MGSolver *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && partition == 0) printf("%i %e\n", solver->iter_counter, rms_res); @@ -292,7 +292,7 @@ class MGSolverStages : public SolverBase { auto comm = AddBoundaryExchangeTasks(depends_on, tl, md_in, multilevel); auto mat_mult = eqs_.Ax(tl, comm, md_base, md_in, md_out); - return tl.AddTask(mat_mult, TF(&MGSolverStages::Jacobi), this, md_rhs, md_out, + return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, md_rhs, md_out, md_diag, md_in, md_out, omega); } From 4789e0ebc9ba3a4c1e6ff1d6bb956f4a1ca90401 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 15:39:24 -0700 Subject: [PATCH 56/62] finish cleanup --- example/poisson_gmg/CMakeLists.txt | 2 +- example/poisson_gmg/poisson_driver.cpp | 25 ++++++++++--------- ...uation_stages.hpp => poisson_equation.hpp} | 6 ++--- example/poisson_gmg/poisson_package.cpp | 25 ++++++++----------- src/CMakeLists.txt | 6 ++--- ..._solver_stages.hpp => bicgstab_solver.hpp} | 19 +++++++------- .../{cg_solver_stages.hpp => cg_solver.hpp} | 23 +++++++++-------- .../{mg_solver_stages.hpp => mg_solver.hpp} | 25 +++++++++---------- src/solvers/solver_utils.hpp | 3 +-- src/utils/type_list.hpp | 6 ++--- 10 files changed, 68 insertions(+), 72 deletions(-) rename example/poisson_gmg/{poisson_equation_stages.hpp => poisson_equation.hpp} (98%) rename src/solvers/{bicgstab_solver_stages.hpp => bicgstab_solver.hpp} (97%) rename src/solvers/{cg_solver_stages.hpp => cg_solver.hpp} (94%) rename src/solvers/{mg_solver_stages.hpp => mg_solver.hpp} (97%) diff --git a/example/poisson_gmg/CMakeLists.txt b/example/poisson_gmg/CMakeLists.txt index e95a78264919..d4ccb8d622f5 100644 --- a/example/poisson_gmg/CMakeLists.txt +++ b/example/poisson_gmg/CMakeLists.txt @@ -17,7 +17,7 @@ if( "poisson-gmg-example" IN_LIST DRIVER_LIST OR NOT PARTHENON_DISABLE_EXAMPLES) poisson-gmg-example poisson_driver.cpp poisson_driver.hpp - poisson_equation_stages.hpp + poisson_equation.hpp poisson_package.cpp poisson_package.hpp main.cpp diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index ef2664ac61f4..4167c30ba2ea 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -25,12 +25,12 @@ #include "mesh/meshblock_pack.hpp" #include "parthenon/driver.hpp" #include "poisson_driver.hpp" -#include "poisson_equation_stages.hpp" +#include "poisson_equation.hpp" #include "poisson_package.hpp" #include "prolong_restrict/prolong_restrict.hpp" -#include "solvers/bicgstab_solver_stages.hpp" -#include "solvers/cg_solver_stages.hpp" -#include "solvers/mg_solver_stages.hpp" +#include "solvers/bicgstab_solver.hpp" +#include "solvers/cg_solver.hpp" +#include "solvers/mg_solver.hpp" #include "solvers/solver_utils.hpp" using namespace parthenon::driver::prelude; @@ -76,17 +76,18 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // Move the rhs variable into the rhs stage for stage based solver auto copy_rhs = tl.AddTask(none, TF(solvers::utils::CopyData), md); - copy_rhs = tl.AddTask( - copy_rhs, TF(solvers::utils::CopyData>), md, md_rhs); - + copy_rhs = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData>), + md, md_rhs); + // Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is // known when we solve A.u = rhs if (use_exact_rhs) { auto copy_exact = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData), md); copy_exact = tl.AddTask( - copy_rhs, TF(solvers::utils::CopyData>), md, md_u); + copy_rhs, TF(solvers::utils::CopyData>), md, md_u); auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md_u, true); - auto *eqs = pkg->MutableParam>("poisson_equation"); + auto *eqs = + pkg->MutableParam>("poisson_equation"); copy_rhs = eqs->Ax(tl, comm, md, md_u, md_rhs); } @@ -100,9 +101,9 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { // solution to the exact solution if (use_exact_rhs) { auto copy_back = tl.AddTask( - solve, TF(solvers::utils::CopyData>), md_u, md); - auto diff = tl.AddTask(copy_back, TF(solvers::utils::AddFieldsAndStore), - md, 1.0, -1.0); + solve, TF(solvers::utils::CopyData>), md_u, md); + auto diff = tl.AddTask( + copy_back, TF(solvers::utils::AddFieldsAndStore), md, 1.0, -1.0); auto get_err = solvers::utils::DotProduct(diff, tl, &err, md); tl.AddTask( get_err, diff --git a/example/poisson_gmg/poisson_equation_stages.hpp b/example/poisson_gmg/poisson_equation.hpp similarity index 98% rename from example/poisson_gmg/poisson_equation_stages.hpp rename to example/poisson_gmg/poisson_equation.hpp index f6fd37f53b32..4ff956e2b380 100644 --- a/example/poisson_gmg/poisson_equation_stages.hpp +++ b/example/poisson_gmg/poisson_equation.hpp @@ -10,8 +10,8 @@ // license in this material to reproduce, prepare derivative works, distribute copies to // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== -#ifndef EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ -#define EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ +#ifndef EXAMPLE_POISSON_GMG_POISSON_EQUATION_HPP_ +#define EXAMPLE_POISSON_GMG_POISSON_EQUATION_HPP_ #include #include @@ -315,4 +315,4 @@ class PoissonEquation { } // namespace poisson_package -#endif // EXAMPLE_POISSON_GMG_POISSON_EQUATION_STAGES_HPP_ +#endif // EXAMPLE_POISSON_GMG_POISSON_EQUATION_HPP_ diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 5f7ca3a464f2..753d33ddd504 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -23,13 +23,13 @@ #include #include #include -#include -#include +#include +#include #include #include "defs.hpp" #include "kokkos_abstraction.hpp" -#include "poisson_equation_stages.hpp" +#include "poisson_equation.hpp" #include "poisson_package.hpp" using namespace parthenon::package::prelude; @@ -94,20 +94,17 @@ std::shared_ptr Initialize(ParameterInput *pin) { std::shared_ptr psolver; using prolongator_t = parthenon::solvers::ProlongationBlockInteriorDefault; - using preconditioner_t = - parthenon::solvers::MGSolver; + using preconditioner_t = parthenon::solvers::MGSolver; if (solver == "MG") { - psolver = std::make_shared< - parthenon::solvers::MGSolver>( + psolver = std::make_shared>( "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); } else if (solver == "CG") { - psolver = std::make_shared< - parthenon::solvers::CGSolver>( + psolver = std::make_shared>( "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); } else if (solver == "BiCGSTAB") { - psolver = std::make_shared< - parthenon::solvers::BiCGSTABSolver>( - "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); + psolver = + std::make_shared>( + "base", "u", "rhs", pin, "poisson/solver_params", PoissEq(pin, "poisson")); } else { PARTHENON_FAIL("Unknown solver type."); } @@ -122,8 +119,8 @@ std::shared_ptr Initialize(ParameterInput *pin) { // for the standard Poisson equation. pkg->AddField(D::name(), mD); - std::vector flags{Metadata::Cell, Metadata::Independent, - Metadata::FillGhost, Metadata::WithFluxes, + std::vector flags{Metadata::Cell, Metadata::Independent, + Metadata::FillGhost, Metadata::WithFluxes, Metadata::GMGRestrict, Metadata::GMGProlongate}; auto mflux_comm = Metadata(flags); if (prolong == "Linear") { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0dbbec227393..f5762996418f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -229,10 +229,10 @@ add_library(parthenon amr_criteria/refinement_package.cpp amr_criteria/refinement_package.hpp - solvers/bicgstab_solver_stages.hpp - solvers/cg_solver_stages.hpp + solvers/bicgstab_solver.hpp + solvers/cg_solver.hpp solvers/internal_prolongation.hpp - solvers/mg_solver_stages.hpp + solvers/mg_solver.hpp solvers/solver_base.hpp solvers/solver_utils.hpp diff --git a/src/solvers/bicgstab_solver_stages.hpp b/src/solvers/bicgstab_solver.hpp similarity index 97% rename from src/solvers/bicgstab_solver_stages.hpp rename to src/solvers/bicgstab_solver.hpp index 44c9b5778e77..946c95457e7a 100644 --- a/src/solvers/bicgstab_solver_stages.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -10,8 +10,8 @@ // license in this material to reproduce, prepare derivative works, distribute copies to // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== -#ifndef SOLVERS_BICGSTAB_SOLVER_STAGES_HPP_ -#define SOLVERS_BICGSTAB_SOLVER_STAGES_HPP_ +#ifndef SOLVERS_BICGSTAB_SOLVER_HPP_ +#define SOLVERS_BICGSTAB_SOLVER_HPP_ #include #include @@ -23,7 +23,7 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" -#include "solvers/mg_solver_stages.hpp" +#include "solvers/mg_solver.hpp" #include "solvers/solver_base.hpp" #include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" @@ -87,12 +87,13 @@ class BiCGSTABSolver : public SolverBase { // Internal containers for solver which create deep copies of sol_fields std::string container_rhat0, container_v, container_h, container_s; std::string container_t, container_r, container_p, container_x, container_diag; - + static inline std::size_t id{0}; + public: BiCGSTABSolver(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, ParameterInput *pin, - const std::string &input_block, equations eq_in = equations()) + const std::string &container_rhs, ParameterInput *pin, + const std::string &input_block, equations eq_in = equations()) : preconditioner(container_base, container_u, container_rhs, pin, input_block, eq_in), container_base(container_base), container_u(container_u), @@ -174,8 +175,8 @@ class BiCGSTABSolver : public SolverBase { this); tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, - bool relative_residual, Mesh *pm) { + [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, bool relative_residual, + Mesh *pm) { if (Globals::my_rank == 0 && params_.print_per_step) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) @@ -372,4 +373,4 @@ class BiCGSTABSolver : public SolverBase { } // namespace parthenon -#endif // SOLVERS_BICGSTAB_SOLVER_STAGES_HPP_ +#endif // SOLVERS_BICGSTAB_SOLVER_HPP_ diff --git a/src/solvers/cg_solver_stages.hpp b/src/solvers/cg_solver.hpp similarity index 94% rename from src/solvers/cg_solver_stages.hpp rename to src/solvers/cg_solver.hpp index 73fa967e4015..775a4ab2ab97 100644 --- a/src/solvers/cg_solver_stages.hpp +++ b/src/solvers/cg_solver.hpp @@ -10,8 +10,8 @@ // license in this material to reproduce, prepare derivative works, distribute copies to // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== -#ifndef SOLVERS_CG_SOLVER_STAGES_HPP_ -#define SOLVERS_CG_SOLVER_STAGES_HPP_ +#ifndef SOLVERS_CG_SOLVER_HPP_ +#define SOLVERS_CG_SOLVER_HPP_ #include #include @@ -24,7 +24,7 @@ #include "interface/meshblock_data.hpp" #include "interface/state_descriptor.hpp" #include "kokkos_abstraction.hpp" -#include "solvers/mg_solver_stages.hpp" +#include "solvers/mg_solver.hpp" #include "solvers/solver_base.hpp" #include "solvers/solver_utils.hpp" #include "tasks/tasks.hpp" @@ -79,10 +79,11 @@ class CGSolver : public SolverBase { std::string container_x, container_r, container_v, container_p; static inline std::size_t id{0}; + public: CGSolver(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, ParameterInput *pin, - const std::string &input_block, const equations &eq_in = equations()) + const std::string &container_rhs, ParameterInput *pin, + const std::string &input_block, const equations &eq_in = equations()) : preconditioner(container_base, container_u, container_rhs, pin, input_block, eq_in), container_base(container_base), container_u(container_u), @@ -90,7 +91,7 @@ class CGSolver : public SolverBase { eqs_(eq_in) { FieldTL::IterateTypes( [this](auto t) { this->sol_fields.push_back(decltype(t)::name()); }); - std::string solver_id = "cg" + std::to_string(id++); + std::string solver_id = "cg" + std::to_string(id++); container_x = solver_id + "_x"; container_r = solver_id + "_r"; container_v = solver_id + "_v"; @@ -146,8 +147,8 @@ class CGSolver : public SolverBase { if (params_.print_per_step && Globals::my_rank == 0) { initialize = tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](CGSolver *solver, std::shared_ptr res_tol, - bool relative_residual, Mesh *pm) { + [&](CGSolver *solver, std::shared_ptr res_tol, bool relative_residual, + Mesh *pm) { Real tol = relative_residual ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) : *res_tol; @@ -240,8 +241,8 @@ class CGSolver : public SolverBase { auto check = itl.AddTask( TaskQualifier::completion, get_res | correct_x, "completion", - [](CGSolver *solver, Mesh *pmesh, int max_iter, - std::shared_ptr res_tol, bool relative_residual) { + [](CGSolver *solver, Mesh *pmesh, int max_iter, std::shared_ptr res_tol, + bool relative_residual) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); solver->final_residual = rms_res; solver->final_iteration = solver->iter_counter; @@ -278,4 +279,4 @@ class CGSolver : public SolverBase { } // namespace solvers } // namespace parthenon -#endif // SOLVERS_CG_SOLVER_STAGES_HPP_ +#endif // SOLVERS_CG_SOLVER_HPP_ diff --git a/src/solvers/mg_solver_stages.hpp b/src/solvers/mg_solver.hpp similarity index 97% rename from src/solvers/mg_solver_stages.hpp rename to src/solvers/mg_solver.hpp index 54719714625a..6662b6cdffb5 100644 --- a/src/solvers/mg_solver_stages.hpp +++ b/src/solvers/mg_solver.hpp @@ -10,8 +10,8 @@ // license in this material to reproduce, prepare derivative works, distribute copies to // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== -#ifndef SOLVERS_MG_SOLVER_STAGES_HPP_ -#define SOLVERS_MG_SOLVER_STAGES_HPP_ +#ifndef SOLVERS_MG_SOLVER_HPP_ +#define SOLVERS_MG_SOLVER_HPP_ #include #include @@ -77,6 +77,7 @@ struct MGParams { template class MGSolver : public SolverBase { static inline std::size_t id{0}; + public: using FieldTL = typename equations_t::IndependentVars; @@ -95,16 +96,14 @@ class MGSolver : public SolverBase { std::string container_res_err, container_temp, container_u0, container_diag; MGSolver(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, ParameterInput *pin, - const std::string &input_block, equations_t eq_in = equations_t()) - : MGSolver(container_base, container_u, container_rhs, - MGParams(pin, input_block), eq_in, - prolongator_t(pin, input_block)) {} + const std::string &container_rhs, ParameterInput *pin, + const std::string &input_block, equations_t eq_in = equations_t()) + : MGSolver(container_base, container_u, container_rhs, MGParams(pin, input_block), + eq_in, prolongator_t(pin, input_block)) {} MGSolver(const std::string &container_base, const std::string &container_u, - const std::string &container_rhs, MGParams params_in, - equations_t eq_in = equations_t(), - prolongator_t prol_in = prolongator_t()) + const std::string &container_rhs, MGParams params_in, + equations_t eq_in = equations_t(), prolongator_t prol_in = prolongator_t()) : container_base(container_base), container_u(container_u), container_rhs(container_rhs), params_(params_in), iter_counter(0), eqs_(eq_in), prolongator_(prol_in) { @@ -292,8 +291,8 @@ class MGSolver : public SolverBase { auto comm = AddBoundaryExchangeTasks(depends_on, tl, md_in, multilevel); auto mat_mult = eqs_.Ax(tl, comm, md_base, md_in, md_out); - return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, md_rhs, md_out, - md_diag, md_in, md_out, omega); + return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, md_rhs, md_out, md_diag, + md_in, md_out, omega); } template @@ -530,4 +529,4 @@ class MGSolver : public SolverBase { } // namespace parthenon -#endif // SOLVERS_MG_SOLVER_STAGES_HPP_ +#endif // SOLVERS_MG_SOLVER_HPP_ diff --git a/src/solvers/solver_utils.hpp b/src/solvers/solver_utils.hpp index 40f9378c4920..7a4aaa119fac 100644 --- a/src/solvers/solver_utils.hpp +++ b/src/solvers/solver_utils.hpp @@ -215,7 +215,7 @@ TaskStatus SetToZero(const std::shared_ptr> &md) { int nblocks = md->NumBlocks(); using TE = parthenon::TopologicalElement; TE te = TE::CC; - static auto desc = [&]{ + static auto desc = [&] { if constexpr (isTypeList::value) { return parthenon::MakePackDescriptorFromTypeList(md.get()); } else { @@ -245,7 +245,6 @@ TaskStatus SetToZero(const std::shared_ptr> &md) { return TaskStatus::complete; } - template TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md, Real wa = 1.0, Real wb = 1.0, diff --git a/src/utils/type_list.hpp b/src/utils/type_list.hpp index a401937302cb..aa52b0e6e8ec 100644 --- a/src/utils/type_list.hpp +++ b/src/utils/type_list.hpp @@ -94,12 +94,10 @@ auto GetNames() { } template -struct isTypeList : public std::false_type - { }; +struct isTypeList : public std::false_type {}; template -struct isTypeList> : public std::true_type - { }; +struct isTypeList> : public std::true_type {}; } // namespace parthenon From 6d1a882f307758dda5a88e14f572f1c12bb103b0 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Thu, 21 Nov 2024 16:29:57 -0700 Subject: [PATCH 57/62] possibly fix compilation --- example/poisson_gmg/poisson_driver.cpp | 6 +++--- src/solvers/solver_utils.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index 4167c30ba2ea..f2ea76fe22fd 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -75,16 +75,16 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto &md_rhs = pmesh->mesh_data.Add("rhs", md, {u::name()}); // Move the rhs variable into the rhs stage for stage based solver - auto copy_rhs = tl.AddTask(none, TF(solvers::utils::CopyData), md); + auto copy_rhs = tl.AddTask(none, TF(solvers::utils::CopyDataBetweenFields), md); copy_rhs = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData>), md, md_rhs); // Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is // known when we solve A.u = rhs if (use_exact_rhs) { - auto copy_exact = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData), md); + auto copy_exact = tl.AddTask(copy_rhs, TF(solvers::utils::CopyDataBetweenFields), md); copy_exact = tl.AddTask( - copy_rhs, TF(solvers::utils::CopyData>), md, md_u); + copy_exact, TF(solvers::utils::CopyData>), md, md_u); auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md_u, true); auto *eqs = pkg->MutableParam>("poisson_equation"); diff --git a/src/solvers/solver_utils.hpp b/src/solvers/solver_utils.hpp index 7a4aaa119fac..21cb36c24ccf 100644 --- a/src/solvers/solver_utils.hpp +++ b/src/solvers/solver_utils.hpp @@ -149,7 +149,7 @@ struct Stencil { namespace utils { template -TaskStatus CopyData(const std::shared_ptr> &md) { +TaskStatus CopyDataBetweenFields(const std::shared_ptr> &md) { using TE = parthenon::TopologicalElement; TE te = TE::CC; IndexRange ib = md->GetBoundsI(IndexDomain::entire, te); From 2478da436bd311f0af72f3de2779144c68fbbd52 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 25 Nov 2024 11:57:04 -0700 Subject: [PATCH 58/62] put same MeshData utilities in their own namespace --- example/poisson_gmg/poisson_driver.cpp | 11 +++++++---- src/solvers/solver_utils.hpp | 14 +++++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index f2ea76fe22fd..49eefe1164c4 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -75,14 +75,16 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto &md_rhs = pmesh->mesh_data.Add("rhs", md, {u::name()}); // Move the rhs variable into the rhs stage for stage based solver - auto copy_rhs = tl.AddTask(none, TF(solvers::utils::CopyDataBetweenFields), md); + auto copy_rhs = + tl.AddTask(none, TF(solvers::utils::between_fields::CopyData), md); copy_rhs = tl.AddTask(copy_rhs, TF(solvers::utils::CopyData>), md, md_rhs); // Possibly set rhs <- A.u_exact for a given u_exact so that the exact solution is // known when we solve A.u = rhs if (use_exact_rhs) { - auto copy_exact = tl.AddTask(copy_rhs, TF(solvers::utils::CopyDataBetweenFields), md); + auto copy_exact = tl.AddTask( + copy_rhs, TF(solvers::utils::between_fields::CopyData), md); copy_exact = tl.AddTask( copy_exact, TF(solvers::utils::CopyData>), md, md_u); auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md_u, true); @@ -103,8 +105,9 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { auto copy_back = tl.AddTask( solve, TF(solvers::utils::CopyData>), md_u, md); auto diff = tl.AddTask( - copy_back, TF(solvers::utils::AddFieldsAndStore), md, 1.0, -1.0); - auto get_err = solvers::utils::DotProduct(diff, tl, &err, md); + copy_back, TF(solvers::utils::between_fields::AddFieldsAndStore), + md, 1.0, -1.0); + auto get_err = solvers::utils::between_fields::DotProduct(diff, tl, &err, md); tl.AddTask( get_err, [](PoissonDriver *driver, int partition) { diff --git a/src/solvers/solver_utils.hpp b/src/solvers/solver_utils.hpp index 21cb36c24ccf..23372650d781 100644 --- a/src/solvers/solver_utils.hpp +++ b/src/solvers/solver_utils.hpp @@ -148,8 +148,10 @@ struct Stencil { }; namespace utils { + +namespace between_fields { template -TaskStatus CopyDataBetweenFields(const std::shared_ptr> &md) { +TaskStatus CopyData(const std::shared_ptr> &md) { using TE = parthenon::TopologicalElement; TE te = TE::CC; IndexRange ib = md->GetBoundsI(IndexDomain::entire, te); @@ -177,6 +179,7 @@ TaskStatus CopyDataBetweenFields(const std::shared_ptr> &md) { }); return TaskStatus::complete; } +} // namespace between_fields template TaskStatus CopyData(const std::shared_ptr> &md_in, @@ -245,6 +248,10 @@ TaskStatus SetToZero(const std::shared_ptr> &md) { return TaskStatus::complete; } +// Utilities functions in the between_fields namespace work on separate fields specified +// by types. Other utility functions work on the same list of fields across separate +// MeshData containers. +namespace between_fields { template TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md, Real wa = 1.0, Real wb = 1.0, @@ -292,6 +299,7 @@ TaskStatus AddFieldsAndStore(const std::shared_ptr> &md, Real wa return AddFieldsAndStoreInteriorSelect( md, wa, wb, false); } +} // namespace between_fields template TaskStatus AddFieldsAndStoreInteriorSelect(const std::shared_ptr> &md_a, @@ -347,6 +355,7 @@ TaskStatus AddFieldsAndStore(const std::shared_ptr> &md_a, wa, wb, false); } +namespace between_fields { template TaskStatus ADividedByB(const std::shared_ptr> &md) { IndexRange ib = md->GetBoundsI(IndexDomain::interior); @@ -367,6 +376,7 @@ TaskStatus ADividedByB(const std::shared_ptr> &md) { }); return TaskStatus::complete; } +} // namespace between_fields template TaskStatus ADividedByB(const std::shared_ptr> &md_a, @@ -391,6 +401,7 @@ TaskStatus ADividedByB(const std::shared_ptr> &md_a, return TaskStatus::complete; } +namespace between_fields { template TaskStatus DotProductLocal(const std::shared_ptr> &md, AllReduce *adotb) { @@ -439,6 +450,7 @@ TaskID DotProduct(TaskID dependency_in, TaskList &tl, AllReduce *adotb, start_global_adotb, &AllReduce::CheckReduce, adotb); return finish_global_adotb; } +} // namespace between_fields template TaskStatus GlobalMinLocal(const std::shared_ptr> &md, From d4a5a267a0794f45a039e16a6f381d1923d755c5 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 25 Nov 2024 11:58:35 -0700 Subject: [PATCH 59/62] fix changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e4676e218c1..1038df640ef8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,6 @@ ## Current develop ### Added (new features/APIs/variables/...) -- [[PR 1174]](https://github.com/parthenon-hpc-lab/parthenon/pull/1174) Add CG solver and custom solver prolongation operator options - [[PR 1103]](https://github.com/parthenon-hpc-lab/parthenon/pull/1103) Add sparsity to vector wave equation test - [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185) Bugfix to particle defragmentation - [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D @@ -37,6 +36,7 @@ ### Incompatibilities (i.e. breaking changes) +- [[PR 1174]](https://github.com/parthenon-hpc-lab/parthenon/pull/1174) Add CG solver, custom solver prolongation operator options, and switch to stage based solvers - [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag ## Release 24.08 From 0d49d78116823de5b2e834ea01357fe54aa0adbe Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 26 Nov 2024 17:53:40 -0700 Subject: [PATCH 60/62] Update src/solvers/cg_solver.hpp Co-authored-by: Jonah Miller --- src/solvers/cg_solver.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index 775a4ab2ab97..085d19185b0f 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -234,7 +234,7 @@ class CGSolver : public SolverBase { [&](CGSolver *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && solver->params_.print_per_step) - printf("%i %e\n", solver->iter_counter, rms_res); + printf("\t%i %e\n", solver->iter_counter, rms_res); return TaskStatus::complete; }, this, pmesh); From 95ed5b2cf0d02e90a9da2a5684612e39525e1ecb Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 26 Nov 2024 17:59:05 -0700 Subject: [PATCH 61/62] act on Jonah comments --- example/poisson_gmg/poisson_driver.cpp | 8 ++++---- src/interface/sparse_pack.hpp | 3 ++- src/interface/sparse_pack_base.cpp | 18 ++++++++++++------ src/interface/sparse_pack_base.hpp | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index 49eefe1164c4..77b07b79bfee 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -85,17 +85,17 @@ TaskCollection PoissonDriver::MakeTaskCollection(BlockList_t &blocks) { if (use_exact_rhs) { auto copy_exact = tl.AddTask( copy_rhs, TF(solvers::utils::between_fields::CopyData), md); - copy_exact = tl.AddTask( + auto copy_u_between_stages = tl.AddTask( copy_exact, TF(solvers::utils::CopyData>), md, md_u); - auto comm = AddBoundaryExchangeTasks(copy_exact, tl, md_u, true); + auto comm = AddBoundaryExchangeTasks(copy_u_between_stages, tl, + md_u, true); auto *eqs = pkg->MutableParam>("poisson_equation"); copy_rhs = eqs->Ax(tl, comm, md, md_u, md_rhs); } // Set initial solution guess to zero - auto zero_u = tl.AddTask(copy_rhs, TF(solvers::utils::SetToZero), md); - zero_u = tl.AddTask(zero_u, TF(solvers::utils::SetToZero), md_u); + auto zero_u = tl.AddTask(copy_rhs, TF(solvers::utils::SetToZero), md_u); auto setup = psolver->AddSetupTasks(tl, zero_u, i, pmesh); auto solve = psolver->AddTasks(tl, setup, i, pmesh); diff --git a/src/interface/sparse_pack.hpp b/src/interface/sparse_pack.hpp index bdd48792b66b..824e2aa95d12 100644 --- a/src/interface/sparse_pack.hpp +++ b/src/interface/sparse_pack.hpp @@ -196,7 +196,8 @@ class SparsePack : public SparsePackBase { KOKKOS_INLINE_FUNCTION bool IsPhysicalBoundary(const int b, const int off3, const int off2, const int off1) const { - return block_props_(b, (off1 + 1) + 3 * ((off2 + 1) + 3 * (off3 + 1))) == bnd_flag; + return block_props_(b, (off1 + 1) + 3 * ((off2 + 1) + 3 * (off3 + 1))) == + physical_bnd_flag; } KOKKOS_INLINE_FUNCTION int GetGID(const int b) const { return block_props_(b, 27); } diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index 4fdc4e0db17c..d11d1df3ab9b 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -219,17 +219,23 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, for (int oxb = -1; oxb <= 1; ++oxb) { for (int oxa = -1; oxa <= 1; ++oxa) { if (pmb->IsPhysicalBoundary(inner_x1)) - pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, oxa, -1)) = bnd_flag; + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, oxa, -1)) = + physical_bnd_flag; if (pmb->IsPhysicalBoundary(outer_x1)) - pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, oxa, 1)) = bnd_flag; + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, oxa, 1)) = + physical_bnd_flag; if (pmb->IsPhysicalBoundary(inner_x2)) - pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, -1, oxa)) = bnd_flag; + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, -1, oxa)) = + physical_bnd_flag; if (pmb->IsPhysicalBoundary(outer_x2)) - pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, 1, oxa)) = bnd_flag; + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(oxb, 1, oxa)) = + physical_bnd_flag; if (pmb->IsPhysicalBoundary(inner_x3)) - pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(-1, oxb, oxa)) = bnd_flag; + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(-1, oxb, oxa)) = + physical_bnd_flag; if (pmb->IsPhysicalBoundary(outer_x3)) - pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(1, oxb, oxa)) = bnd_flag; + pack.block_props_h_(blidx, bp_idxer.GetFlatIdx(1, oxb, oxa)) = + physical_bnd_flag; } } diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index d1d59caff844..9f03a98301d6 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -64,7 +64,7 @@ class SparsePackBase { using block_props_h_t = typename block_props_t::HostMirror; using coords_t = ParArray1DRaw>; - static constexpr int bnd_flag = -2000; + static constexpr int physical_bnd_flag = -2000; // Returns a SparsePackBase object that is either newly created or taken // from the cache in pmd. The cache itself handles the all of this logic From d71062e6f4c8e0fddb887b05aa2111681a61b4dd Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 26 Nov 2024 18:02:09 -0700 Subject: [PATCH 62/62] format --- src/solvers/cg_solver.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solvers/cg_solver.hpp b/src/solvers/cg_solver.hpp index 085d19185b0f..898b16c885b7 100644 --- a/src/solvers/cg_solver.hpp +++ b/src/solvers/cg_solver.hpp @@ -234,7 +234,7 @@ class CGSolver : public SolverBase { [&](CGSolver *solver, Mesh *pmesh) { Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells()); if (Globals::my_rank == 0 && solver->params_.print_per_step) - printf("\t%i %e\n", solver->iter_counter, rms_res); + printf("\t%i %e\n", solver->iter_counter, rms_res); return TaskStatus::complete; }, this, pmesh);