Skip to content

Commit

Permalink
Merge branch 'develop' into pgrete/pmd-output
Browse files Browse the repository at this point in the history
  • Loading branch information
BenWibking authored Nov 22, 2024
2 parents 503a5b6 + b559452 commit d064f03
Show file tree
Hide file tree
Showing 35 changed files with 728 additions and 401 deletions.
24 changes: 18 additions & 6 deletions .github/workflows/check-compilers.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
name: Check compilers

on: [push, pull_request]
on:
# run every day at 06:00 UTC
schedule:
- cron: '0 6 * * *'
# when triggered manually
workflow_dispatch:
# when auto merge is enabled (hack to make sure it's run before merging)
pull_request:
types: [auto_merge_enabled]

# Cancel "duplicated" workflows triggered by pushes to internal
# branches with associated PRs.
Expand All @@ -14,7 +22,7 @@ jobs:
strategy:
matrix:
cxx: ['g++', 'clang++-15']
cmake_build_type: ['Release', 'Debug']
cmake_build_type: ['Release', 'DbgNoSym']
device: ['cuda', 'host']
parallel: ['serial', 'mpi']
exclude:
Expand All @@ -23,7 +31,7 @@ jobs:
# https://github.com/lanl/parthenon/issues/630
- cxx: clang++-15
device: cuda
cmake_build_type: Debug
cmake_build_type: DbgNoSym
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
Expand All @@ -48,25 +56,29 @@ jobs:
strategy:
matrix:
cxx: ['hipcc']
cmake_build_type: ['Release', 'Debug']
cmake_build_type: ['Release', 'DbgNoSym']
device: ['hip']
parallel: ['serial', 'mpi']
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
image: ghcr.io/parthenon-hpc-lab/rocm6.2-mpi-hdf5
env:
CMAKE_GENERATOR: Ninja
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- name: CMake
# Manually chaning the arch for this (debug) build as the
# -O0 option causes compiler issue for the navi 1030 GPU at
# compile time, see https://github.com/parthenon-hpc-lab/parthenon/pull/1191#issuecomment-2492035364
run: |
cmake -B builddir \
-DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \
-DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \
-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }}
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }} \
-DKokkos_ARCH_AMD_GFX90A=ON -DKokkos_ARCH_NAVI1030=OFF
- name: Build
run: |
cmake --build builddir --parallel 2
4 changes: 3 additions & 1 deletion .github/workflows/ci-extended.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# CUDA IPC within docker repeated seem to cause issue on the CI machine
OMPI_MCA_btl_smcuda_use_cuda_ipc: 0
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

Expand All @@ -34,7 +36,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/ci-short.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# CUDA IPC within docker repeated seem to cause issue on the CI machine
OMPI_MCA_btl_smcuda_use_cuda_ipc: 0
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

Expand All @@ -22,7 +24,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -47,7 +49,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -79,7 +81,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down
27 changes: 15 additions & 12 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185/files) Bugfix to particle defragmentation
- [[PR 1103]](https://github.com/parthenon-hpc-lab/parthenon/pull/1103) Add sparsity to vector wave equation test
- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185) Bugfix to particle defragmentation
- [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D
- [[PR 1183]](https://github.com/parthenon-hpc-lab/parthenon/pull/1183) Fix particle leapfrog example initialization data
- [[PR 1179]](https://github.com/parthenon-hpc-lab/parthenon/pull/1179) Make a global variable for whether simulation is a restart
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option
- [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades

### Changed (changing behavior/API/variables/...)
- [[PR 1191]](https://github.com/parthenon-hpc-lab/parthenon/pull/1191) Update Kokkos version to 4.4.1
- [[PR 1209]](https://github.com/parthenon-hpc-lab/parthenon/pull/1209) Ordered history output
- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix
- [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image
- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image
- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize
- [[Issue 1165]](https://github.com/parthenon-hpc-lab/parthenon/issues/1165) Bump Kokkos submodule to 4.4.1
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option
- [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls

### Fixed (not changing behavior/API/variables/...)
- [[PR 1188]](https://github.com/parthenon-hpc-lab/parthenon/pull/1188) Fix hdf5 output issue for metadata none variables, update test.
- [[PR 1170]](https://github.com/parthenon-hpc-lab/parthenon/pull/1170) Fixed incorrect initialization of array by a const not constexpr
- [[PR 1189]](https://github.com/parthenon-hpc-lab/parthenon/pull/1189) Address CUDA MPI/ICP issue with Kokkos <=4.4.1
- [[PR 1178]](https://github.com/parthenon-hpc-lab/parthenon/pull/1178) Fix issue with mesh pointer when using relative residual tolerance in BiCGSTAB solver.
Expand All @@ -32,7 +35,7 @@


### Incompatibilities (i.e. breaking changes)
- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag

## Release 24.08
Date: 2024-08-30
Expand Down Expand Up @@ -156,12 +159,12 @@ Date: 2024-03-21
- [[PR 973]](https://github.com/parthenon-hpc-lab/parthenon/pull/973) Multigrid performance upgrades

### Fixed (not changing behavior/API/variables/...)
- [[PR1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool
- [[PR1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code
- [[PR992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools
- [[PR988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes
- [[PR986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing
- [[PR978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check
- [[PR 1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool
- [[PR 1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code
- [[PR 992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools
- [[PR 988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes
- [[PR 986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing
- [[PR 978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check

### Infrastructure (changes irrelevant to downstream codes)
- [[PR 1027]](https://github.com/parthenon-hpc-lab/parthenon/pull/1027) Refactor RestartReader as abstract class
Expand Down Expand Up @@ -228,7 +231,7 @@ Date: 2023-11-16
- [[PR 901]](https://github.com/parthenon-hpc-lab/parthenon/pull/901) Implement shared element ownership model

### Removed (removing behavior/API/varaibles/...)
- [[PR 930](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage.
- [[PR 930]](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage.


## Release 0.8.0
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ include(cmake/Format.cmake)
include(cmake/Lint.cmake)

# regression test reference data
set(REGRESSION_GOLD_STANDARD_VER 24 CACHE STRING "Version of gold standard to download and use")
set(REGRESSION_GOLD_STANDARD_VER 25 CACHE STRING "Version of gold standard to download and use")
set(REGRESSION_GOLD_STANDARD_HASH
"SHA512=e220df92a335131131e42ddb52dc221a6dbd6bb56361483b4af0292620eeb82ffb21ef3b95fd9a7c5cc158fb754da0bf1a1015bec98b5bbad05f4bceb1ee99bc"
"SHA512=314dc8312366d81ba33d1fde25812e9a7697b2f529de29e22662df0d458f1c4bc5b5bb4e649888170f66ffec0df1be20a9cf401944531c1c1ad835e26eaad28f"
CACHE STRING "Hash of default gold standard file to download")
option(REGRESSION_GOLD_STANDARD_SYNC "Automatically sync gold standard files." ON)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Parthenon -- a performance portable block-structured adaptive mesh refinement fr

* CMake 3.16 or greater
* C++17 compatible compiler
* Kokkos 4.0.1 or greater
* Kokkos 4.4.1 or greater

## Optional (enabling features)

Expand Down
3 changes: 3 additions & 0 deletions cmake/machinecfg/GitHubActions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ")

# common options
set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks")
set(Kokkos_ENABLE_ROCTHRUST OFF CACHE BOOL "Temporarily disabled as the container needs to be updated to the `-complete` base image.")

set(CMAKE_CXX_FLAGS_DBGNOSYM "-O0" CACHE STRING "Debug build without symbols")

set(MACHINE_CXX_FLAGS "")
if (${MACHINE_VARIANT} MATCHES "cuda")
Expand Down
41 changes: 40 additions & 1 deletion doc/sphinx/src/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ Kokkos wrappers/abstractions
- ``par_for`` wrappers use inclusive bounds, i.e., the loop will
include the last index given
- ``ParArrayND`` arrays by default allocate on the *device* using
default precision configured
default precision configured and come with a `State` that can
be used to store additional metadata.
- ``ParArray#DRaw`` directly map to Kokkos ``Views`` that are allocated
on *device* using default precision.
- To create an array on the host with identical layout to the device
array either use

Expand Down Expand Up @@ -62,6 +65,42 @@ parallelism interface that is needed for managing memory cached in
tightly nested loops. The wrappers are documented
:ref:`here <nested par for>`.

View of Views
-------------

Special care needs to be taken when working with a ``View`` of ``Views``.

To repeat the Kokkos documenation: `Don't use them <https://kokkos.org/kokkos-core-wiki/ProgrammingGuide/View.html#can-i-make-a-view-of-views>`__

But if you have to (which is the case in some places inside Parthenon)
then follow this pattern:

.. code:: c++

ParArray1DRaw<ParArray1D<Real>> view_of_pararrays(parthenon::ViewOfViewAlloc("myname"), 10);

The ``ViewOfViewAlloc`` ensures that the ``Kokkos::SequentialHostInit`` property is added,
which results in the (inner ``View`` ) deallocators being called on the host (rather than on
the device by default).
Also note the use of the "raw" ``ParArray1DRaw``, which directly maps to a Kokkos ``View``
(that is required to process the allocation property as this interface is not exposed
in the more generic ``ParArrayND``).

Similarly, when you create a host mirror of said ``View`` of ``View`` add the additional
property for the same reason.

.. code:: c++

// explicit theoretical example -- don't use this
auto view_of_pararrays_h =
Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), view_of_pararrays);

// but instead use this interface provided by Parthenon:
auto view_of_pararrays_h = create_view_of_view_mirror(view_of_pararrays);


Note that the ``SequentialHostInit`` was only added in Kokkos 4.4.1 (which is now the default in Parthenon).

The need for reductions within function handling ``MeshBlock`` data
-------------------------------------------------------------------

Expand Down
4 changes: 3 additions & 1 deletion doc/sphinx/src/outputs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ block might look like

This will produce a text file (``.hst``) output file every 1 units of
simulation time. The content of the file is determined by the functions
enrolled by specific packages, see :ref:`state history output`.
enrolled by specific packages, see :ref:`state history output`. Per-package history
outputs will always be in alphabetical order by package name, which may not match
the order in which packages were added to a simulation.

Histograms
----------
Expand Down
33 changes: 33 additions & 0 deletions example/advection/advection_package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,20 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
m = Metadata({Metadata::Cell, Metadata::OneCopy}, std::vector<int>({1}));
pkg->AddField("my_derived_var", m);

// Create a Metadata::None variable for IO testing purposes.
// Only load if test_metadata_none is specified in the Advection block
auto test_metadata_none =
pin->GetOrAddBoolean("Advection", "test_metadata_none", false);
pkg->AddParam<bool>("test_metadata_none", test_metadata_none);
if (test_metadata_none) {
const int nx1 = pin->GetOrAddInteger("parthenon/meshblock", "nx1", 1);
const int nx2 = pin->GetOrAddInteger("parthenon/meshblock", "nx2", 1);
const int nx3 = pin->GetOrAddInteger("parthenon/meshblock", "nx3", 1);
std::vector<int> test_shape = {nx1 + 1, nx2 + 1, nx3 + 1, 3};
m = Metadata({Metadata::OneCopy, Metadata::None}, test_shape);
pkg->AddField("metadata_none_var", m);
}

// List (vector) of HistoryOutputVar that will all be enrolled as output variables
parthenon::HstVar_list hst_vars = {};
// Now we add a couple of callback functions
Expand Down Expand Up @@ -281,6 +295,7 @@ AmrTag CheckRefinement(MeshBlockData<Real> *rc) {
void PreFill(MeshBlockData<Real> *rc) {
auto pmb = rc->GetBlockPointer();
auto pkg = pmb->packages.Get("advection_package");
const bool test_metadata_none = pkg->Param<bool>("test_metadata_none");
bool fill_derived = pkg->Param<bool>("fill_derived");

if (fill_derived) {
Expand All @@ -302,6 +317,24 @@ void PreFill(MeshBlockData<Real> *rc) {
v(out + n, k, j, i) = 1.0 - v(in + n, k, j, i);
});
}

// Fill the metadata::None var with index gymnastics.
if (test_metadata_none) {
const int nx1 = pmb->cellbounds.ncellsi(IndexDomain::interior);
const int nx2 = pmb->cellbounds.ncellsj(IndexDomain::interior);
const int nx3 = pmb->cellbounds.ncellsk(IndexDomain::interior);

// packing in principle unnecessary/convoluted here and just done for demonstration
std::vector<std::string> vars({"metadata_none_var"});
PackIndexMap imap;
const auto &v = rc->PackVariables(vars, imap);

pmb->par_for(
PARTHENON_AUTO_LABEL, 0, 2, 0, nx3, 0, nx2, 0, nx1,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
v(n, k, j, i) = n + k + j + i;
});
}
}

// this is the package registered function to fill derived
Expand Down
Loading

0 comments on commit d064f03

Please sign in to comment.