Skip to content

Commit

Permalink
Merge branch 'develop' into acreyes/par-dispatch-template
Browse files Browse the repository at this point in the history
  • Loading branch information
pgrete committed Dec 2, 2024
2 parents 1661218 + 4cde093 commit e6c7b93
Show file tree
Hide file tree
Showing 39 changed files with 346 additions and 379 deletions.
24 changes: 18 additions & 6 deletions .github/workflows/check-compilers.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
name: Check compilers

on: [push, pull_request]
on:
# run every day at 06:00 UTC
schedule:
- cron: '0 6 * * *'
# when triggered manually
workflow_dispatch:
# when auto merge is enabled (hack to make sure it's run before merging)
pull_request:
types: [auto_merge_enabled]

# Cancel "duplicated" workflows triggered by pushes to internal
# branches with associated PRs.
Expand All @@ -14,7 +22,7 @@ jobs:
strategy:
matrix:
cxx: ['g++', 'clang++-15']
cmake_build_type: ['Release', 'Debug']
cmake_build_type: ['Release', 'DbgNoSym']
device: ['cuda', 'host']
parallel: ['serial', 'mpi']
exclude:
Expand All @@ -23,7 +31,7 @@ jobs:
# https://github.com/lanl/parthenon/issues/630
- cxx: clang++-15
device: cuda
cmake_build_type: Debug
cmake_build_type: DbgNoSym
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
Expand All @@ -48,25 +56,29 @@ jobs:
strategy:
matrix:
cxx: ['hipcc']
cmake_build_type: ['Release', 'Debug']
cmake_build_type: ['Release', 'DbgNoSym']
device: ['hip']
parallel: ['serial', 'mpi']
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
image: ghcr.io/parthenon-hpc-lab/rocm6.2-mpi-hdf5
env:
CMAKE_GENERATOR: Ninja
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- name: CMake
# Manually chaning the arch for this (debug) build as the
# -O0 option causes compiler issue for the navi 1030 GPU at
# compile time, see https://github.com/parthenon-hpc-lab/parthenon/pull/1191#issuecomment-2492035364
run: |
cmake -B builddir \
-DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \
-DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \
-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }}
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }} \
-DKokkos_ARCH_AMD_GFX90A=ON -DKokkos_ARCH_NAVI1030=OFF
- name: Build
run: |
cmake --build builddir --parallel 2
4 changes: 3 additions & 1 deletion .github/workflows/ci-extended.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# CUDA IPC within docker repeated seem to cause issue on the CI machine
OMPI_MCA_btl_smcuda_use_cuda_ipc: 0
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

Expand All @@ -34,7 +36,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/ci-short.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ env:
CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build
MACHINE_CFG: cmake/machinecfg/CI.cmake
OMPI_MCA_mpi_common_cuda_event_max: 1000
# CUDA IPC within docker repeated seem to cause issue on the CI machine
OMPI_MCA_btl_smcuda_use_cuda_ipc: 0
# https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231
OMPI_MCA_btl_vader_single_copy_mechanism: none

Expand All @@ -22,7 +24,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -47,7 +49,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -79,7 +81,7 @@ jobs:
container:
image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent
# map to local user id on CI machine to allow writing to build cache
options: --user 1001
options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728
steps:
- uses: actions/checkout@v3
with:
Expand Down
28 changes: 16 additions & 12 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,28 @@

### Added (new features/APIs/variables/...)
- [[PR 1142]](https://github.com/parthenon-hpc-lab/parthenon/pull/1142) Unify par_dispatch, par_for_outer & par_for_inner overloads
- [[PR 1210]](https://github.com/parthenon-hpc-lab/parthenon/pull/1210) Add cycle based output
- [[PR 1103]](https://github.com/parthenon-hpc-lab/parthenon/pull/1103) Add sparsity to vector wave equation test
- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185/files) Bugfix to particle defragmentation
- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185) Bugfix to particle defragmentation
- [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D
- [[PR 1183]](https://github.com/parthenon-hpc-lab/parthenon/pull/1183) Fix particle leapfrog example initialization data
- [[PR 1179]](https://github.com/parthenon-hpc-lab/parthenon/pull/1179) Make a global variable for whether simulation is a restart
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option
- [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades

### Changed (changing behavior/API/variables/...)
- [[PR 1191]](https://github.com/parthenon-hpc-lab/parthenon/pull/1191) Update Kokkos version to 4.4.1
- [[PR 1209]](https://github.com/parthenon-hpc-lab/parthenon/pull/1209) Ordered history output
- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix
- [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image
- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image
- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize
- [[Issue 1165]](https://github.com/parthenon-hpc-lab/parthenon/issues/1165) Bump Kokkos submodule to 4.4.1
- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option
- [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls

### Fixed (not changing behavior/API/variables/...)
- [[PR 1211]](https://github.com/parthenon-hpc-lab/parthenon/pull/1211) remove inline from WriteTaskGraph
- [[PR 1188]](https://github.com/parthenon-hpc-lab/parthenon/pull/1188) Fix hdf5 output issue for metadata none variables, update test.
- [[PR 1170]](https://github.com/parthenon-hpc-lab/parthenon/pull/1170) Fixed incorrect initialization of array by a const not constexpr
- [[PR 1189]](https://github.com/parthenon-hpc-lab/parthenon/pull/1189) Address CUDA MPI/ICP issue with Kokkos <=4.4.1
- [[PR 1178]](https://github.com/parthenon-hpc-lab/parthenon/pull/1178) Fix issue with mesh pointer when using relative residual tolerance in BiCGSTAB solver.
Expand All @@ -34,7 +38,7 @@


### Incompatibilities (i.e. breaking changes)
- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag
- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag

## Release 24.08
Date: 2024-08-30
Expand Down Expand Up @@ -158,12 +162,12 @@ Date: 2024-03-21
- [[PR 973]](https://github.com/parthenon-hpc-lab/parthenon/pull/973) Multigrid performance upgrades

### Fixed (not changing behavior/API/variables/...)
- [[PR1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool
- [[PR1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code
- [[PR992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools
- [[PR988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes
- [[PR986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing
- [[PR978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check
- [[PR 1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool
- [[PR 1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code
- [[PR 992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools
- [[PR 988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes
- [[PR 986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing
- [[PR 978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check

### Infrastructure (changes irrelevant to downstream codes)
- [[PR 1027]](https://github.com/parthenon-hpc-lab/parthenon/pull/1027) Refactor RestartReader as abstract class
Expand Down Expand Up @@ -230,7 +234,7 @@ Date: 2023-11-16
- [[PR 901]](https://github.com/parthenon-hpc-lab/parthenon/pull/901) Implement shared element ownership model

### Removed (removing behavior/API/varaibles/...)
- [[PR 930](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage.
- [[PR 930]](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage.


## Release 0.8.0
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ include(cmake/Format.cmake)
include(cmake/Lint.cmake)

# regression test reference data
set(REGRESSION_GOLD_STANDARD_VER 24 CACHE STRING "Version of gold standard to download and use")
set(REGRESSION_GOLD_STANDARD_VER 25 CACHE STRING "Version of gold standard to download and use")
set(REGRESSION_GOLD_STANDARD_HASH
"SHA512=e220df92a335131131e42ddb52dc221a6dbd6bb56361483b4af0292620eeb82ffb21ef3b95fd9a7c5cc158fb754da0bf1a1015bec98b5bbad05f4bceb1ee99bc"
"SHA512=314dc8312366d81ba33d1fde25812e9a7697b2f529de29e22662df0d458f1c4bc5b5bb4e649888170f66ffec0df1be20a9cf401944531c1c1ad835e26eaad28f"
CACHE STRING "Hash of default gold standard file to download")
option(REGRESSION_GOLD_STANDARD_SYNC "Automatically sync gold standard files." ON)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Parthenon -- a performance portable block-structured adaptive mesh refinement fr

* CMake 3.16 or greater
* C++17 compatible compiler
* Kokkos 4.0.1 or greater
* Kokkos 4.4.1 or greater

## Optional (enabling features)

Expand Down
3 changes: 3 additions & 0 deletions cmake/machinecfg/GitHubActions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ")

# common options
set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks")
set(Kokkos_ENABLE_ROCTHRUST OFF CACHE BOOL "Temporarily disabled as the container needs to be updated to the `-complete` base image.")

set(CMAKE_CXX_FLAGS_DBGNOSYM "-O0" CACHE STRING "Debug build without symbols")

set(MACHINE_CXX_FLAGS "")
if (${MACHINE_VARIANT} MATCHES "cuda")
Expand Down
41 changes: 40 additions & 1 deletion doc/sphinx/src/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ Kokkos wrappers/abstractions
- ``par_for`` wrappers use inclusive bounds, i.e., the loop will
include the last index given
- ``ParArrayND`` arrays by default allocate on the *device* using
default precision configured
default precision configured and come with a `State` that can
be used to store additional metadata.
- ``ParArray#DRaw`` directly map to Kokkos ``Views`` that are allocated
on *device* using default precision.
- To create an array on the host with identical layout to the device
array either use

Expand Down Expand Up @@ -62,6 +65,42 @@ parallelism interface that is needed for managing memory cached in
tightly nested loops. The wrappers are documented
:ref:`here <nested par for>`.

View of Views
-------------

Special care needs to be taken when working with a ``View`` of ``Views``.

To repeat the Kokkos documenation: `Don't use them <https://kokkos.org/kokkos-core-wiki/ProgrammingGuide/View.html#can-i-make-a-view-of-views>`__

But if you have to (which is the case in some places inside Parthenon)
then follow this pattern:

.. code:: c++

ParArray1DRaw<ParArray1D<Real>> view_of_pararrays(parthenon::ViewOfViewAlloc("myname"), 10);

The ``ViewOfViewAlloc`` ensures that the ``Kokkos::SequentialHostInit`` property is added,
which results in the (inner ``View`` ) deallocators being called on the host (rather than on
the device by default).
Also note the use of the "raw" ``ParArray1DRaw``, which directly maps to a Kokkos ``View``
(that is required to process the allocation property as this interface is not exposed
in the more generic ``ParArrayND``).

Similarly, when you create a host mirror of said ``View`` of ``View`` add the additional
property for the same reason.

.. code:: c++

// explicit theoretical example -- don't use this
auto view_of_pararrays_h =
Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), view_of_pararrays);

// but instead use this interface provided by Parthenon:
auto view_of_pararrays_h = create_view_of_view_mirror(view_of_pararrays);


Note that the ``SequentialHostInit`` was only added in Kokkos 4.4.1 (which is now the default in Parthenon).

The need for reductions within function handling ``MeshBlock`` data
-------------------------------------------------------------------

Expand Down
18 changes: 15 additions & 3 deletions doc/sphinx/src/outputs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,20 @@ Outputs
Outputs from Parthenon are controlled via ``<parthenon/output*>`` blocks,
where ``*`` should be replaced by a unique integer for each block.

The frequency of outputs can be controlled for each block separately
and can be triggered by either (simulation) time or cycle, i.e.,

- ``dt = 0.1`` means that the output for the block is written every 0.1
in simulation time.
- ``dn = 100`` means that the output for the block is written every 100
cycles.

Note that only one option can be chosen for a given block.
To disable an output block without removing it from the input file set
the block's ``dt < 0.0``.
the block's ``dt < 0.0`` and ``dn < 0`` (which is also happening by default
if the paramter is not provided in the input file).

In addition to time base outputs, two additional options to trigger
In addition to time or cycle based outputs, two additional options to trigger
outputs (applies to HDF5, restart and histogram outputs) exist.

- Signaling: If ``Parthenon`` catches a signal, e.g., ``SIGALRM`` which
Expand Down Expand Up @@ -194,7 +204,9 @@ block might look like

This will produce a text file (``.hst``) output file every 1 units of
simulation time. The content of the file is determined by the functions
enrolled by specific packages, see :ref:`state history output`.
enrolled by specific packages, see :ref:`state history output`. Per-package history
outputs will always be in alphabetical order by package name, which may not match
the order in which packages were added to a simulation.

Histograms
----------
Expand Down
33 changes: 33 additions & 0 deletions example/advection/advection_package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,20 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
m = Metadata({Metadata::Cell, Metadata::OneCopy}, std::vector<int>({1}));
pkg->AddField("my_derived_var", m);

// Create a Metadata::None variable for IO testing purposes.
// Only load if test_metadata_none is specified in the Advection block
auto test_metadata_none =
pin->GetOrAddBoolean("Advection", "test_metadata_none", false);
pkg->AddParam<bool>("test_metadata_none", test_metadata_none);
if (test_metadata_none) {
const int nx1 = pin->GetOrAddInteger("parthenon/meshblock", "nx1", 1);
const int nx2 = pin->GetOrAddInteger("parthenon/meshblock", "nx2", 1);
const int nx3 = pin->GetOrAddInteger("parthenon/meshblock", "nx3", 1);
std::vector<int> test_shape = {nx1 + 1, nx2 + 1, nx3 + 1, 3};
m = Metadata({Metadata::OneCopy, Metadata::None}, test_shape);
pkg->AddField("metadata_none_var", m);
}

// List (vector) of HistoryOutputVar that will all be enrolled as output variables
parthenon::HstVar_list hst_vars = {};
// Now we add a couple of callback functions
Expand Down Expand Up @@ -281,6 +295,7 @@ AmrTag CheckRefinement(MeshBlockData<Real> *rc) {
void PreFill(MeshBlockData<Real> *rc) {
auto pmb = rc->GetBlockPointer();
auto pkg = pmb->packages.Get("advection_package");
const bool test_metadata_none = pkg->Param<bool>("test_metadata_none");
bool fill_derived = pkg->Param<bool>("fill_derived");

if (fill_derived) {
Expand All @@ -302,6 +317,24 @@ void PreFill(MeshBlockData<Real> *rc) {
v(out + n, k, j, i) = 1.0 - v(in + n, k, j, i);
});
}

// Fill the metadata::None var with index gymnastics.
if (test_metadata_none) {
const int nx1 = pmb->cellbounds.ncellsi(IndexDomain::interior);
const int nx2 = pmb->cellbounds.ncellsj(IndexDomain::interior);
const int nx3 = pmb->cellbounds.ncellsk(IndexDomain::interior);

// packing in principle unnecessary/convoluted here and just done for demonstration
std::vector<std::string> vars({"metadata_none_var"});
PackIndexMap imap;
const auto &v = rc->PackVariables(vars, imap);

pmb->par_for(
PARTHENON_AUTO_LABEL, 0, 2, 0, nx3, 0, nx2, 0, nx1,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
v(n, k, j, i) = n + k + j + i;
});
}
}

// this is the package registered function to fill derived
Expand Down
2 changes: 1 addition & 1 deletion external/Kokkos
Submodule Kokkos updated 1031 files
14 changes: 6 additions & 8 deletions scripts/docker/Dockerfile.hip-rocm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocm/dev-ubuntu-20.04:5.4.3
FROM rocm/dev-ubuntu-24.04:6.2

RUN apt-get clean && apt-get update -y && \
DEBIAN_FRONTEND="noninteractive" TZ=America/New_York apt-get install -y --no-install-recommends git python3-minimal libpython3-stdlib bc hwloc wget openssh-client python3-numpy python3-h5py python3-matplotlib lcov curl cmake ninja-build openmpi-bin libopenmpi-dev && \
Expand All @@ -14,12 +14,10 @@ RUN cd /tmp && \
cd / && \
rm -rf /tmp/hdf5-1.10.8*

# "mpic++ --showme" forgets open-pal in Ubuntu 20.04 + OpenMPI 4.0.3
# https://bugs.launchpad.net/ubuntu/+source/openmpi/+bug/1941786
# https://github.com/open-mpi/ompi/issues/9317
ENV LDFLAGS="-lopen-pal"

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10

# uid 1000 maps to the one running the container on the CI host
RUN useradd --create-home --shell /bin/bash -u 1000 -G render ci
# Latest image has default user with uid 1000 (which maps to the one running the container on the CI host
# Need to add user to the group that can access the GPU
RUN usermod -a -G render ubuntu

WORKDIR /home/ubuntu
1 change: 0 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ add_library(parthenon
outputs/restart.hpp
outputs/restart_hdf5.cpp
outputs/restart_hdf5.hpp
outputs/vtk.cpp

parthenon/driver.hpp
parthenon/package.hpp
Expand Down
Loading

0 comments on commit e6c7b93

Please sign in to comment.