diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index f0062b4..1c9dbcd 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -11,7 +11,7 @@ on: jobs: codecov: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 5 steps: - name: Checkout Repository uses: actions/checkout@v4 diff --git a/.github/workflows/linux-clang.yml b/.github/workflows/linux-clang.yml index 6948fea..b510d06 100644 --- a/.github/workflows/linux-clang.yml +++ b/.github/workflows/linux-clang.yml @@ -13,6 +13,7 @@ env: jobs: linux-clang: runs-on: ubuntu-latest + timeout-minutes: 10 strategy: matrix: clang-version: [14, 15, 16, 17, 18] @@ -75,7 +76,8 @@ jobs: -DCMAKE_C_COMPILER=$CC \ -DCMAKE_CXX_COMPILER=$CXX \ -DDYNAMPI_BUILD_TESTS=ON \ - -DDYNAMPI_BUILD_BENCHMARKS=ON" + -DDYNAMPI_BUILD_BENCHMARKS=ON \ + -DDYNAMPI_MAX_MPI_RANK=8" if [ "$MPI_TYPE" == "MPICH" ]; then CMAKE_ARGS="$CMAKE_ARGS -DMPI_C_COMPILER=$CC -DMPI_CXX_COMPILER=$CXX" @@ -102,4 +104,5 @@ jobs: fi cd build echo "Testing with $MPI_TYPE" - ctest --output-on-failure --parallel -C ${{ matrix.build-type }} + export DYNAMPI_MAX_MPI_RANK=8 + ctest --output-on-failure -j 1 --timeout 180 -C ${{ matrix.build-type }} diff --git a/.github/workflows/linux-gcc.yml b/.github/workflows/linux-gcc.yml index d4baf48..b1a63c3 100644 --- a/.github/workflows/linux-gcc.yml +++ b/.github/workflows/linux-gcc.yml @@ -13,6 +13,7 @@ env: jobs: linux-gcc: runs-on: ubuntu-latest + timeout-minutes: 10 strategy: matrix: gcc-version: [11, 12, 13, 14] @@ -77,7 +78,8 @@ jobs: -DCMAKE_C_COMPILER=$CC \ -DCMAKE_CXX_COMPILER=$CXX \ -DDYNAMPI_BUILD_TESTS=ON \ - -DDYNAMPI_BUILD_BENCHMARKS=ON" + -DDYNAMPI_BUILD_BENCHMARKS=ON \ + -DDYNAMPI_MAX_MPI_RANK=8" if [ "$MPI_TYPE" == "MPICH" ]; then CMAKE_ARGS="$CMAKE_ARGS -DMPI_C_COMPILER=$MPI_C_COMPILER -DMPI_CXX_COMPILER=$MPI_CXX_COMPILER" fi @@ -97,4 +99,5 @@ jobs: fi cd build echo "Testing with $MPI_TYPE" - ctest --output-on-failure --parallel -C ${{ matrix.build-type }} + export DYNAMPI_MAX_MPI_RANK=8 + ctest --output-on-failure -j 1 --timeout 180 -C ${{ matrix.build-type }} diff --git a/.github/workflows/linux-intel.yml b/.github/workflows/linux-intel.yml index 967aae4..2737702 100644 --- a/.github/workflows/linux-intel.yml +++ b/.github/workflows/linux-intel.yml @@ -13,6 +13,7 @@ env: jobs: linux-intel: runs-on: ubuntu-latest + timeout-minutes: 10 strategy: matrix: build-type: [Debug, Release] @@ -37,7 +38,8 @@ jobs: -DCMAKE_C_COMPILER=icx \ -DCMAKE_CXX_COMPILER=icpx \ -DDYNAMPI_BUILD_TESTS=ON \ - -DDYNAMPI_BUILD_BENCHMARKS=ON + -DDYNAMPI_BUILD_BENCHMARKS=ON \ + -DDYNAMPI_MAX_MPI_RANK=8 - name: Build with Intel shell: bash run: | @@ -49,4 +51,5 @@ jobs: source /opt/intel/oneapi/setvars.sh cd build echo "Testing with $MPI_TYPE" - ctest --output-on-failure --parallel -C ${{ matrix.build-type }} + export DYNAMPI_MAX_MPI_RANK=8 + ctest --output-on-failure -j 1 --timeout 180 -C ${{ matrix.build-type }} diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index c05ac0c..d83bff2 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -13,6 +13,7 @@ env: jobs: macos-clang: runs-on: macos-latest + timeout-minutes: 5 strategy: matrix: build-type: [Debug, Release] @@ -29,7 +30,8 @@ jobs: cmake -B build \ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ -DDYNAMPI_BUILD_TESTS=ON \ - -DDYNAMPI_BUILD_BENCHMARKS=ON + -DDYNAMPI_BUILD_BENCHMARKS=ON \ + -DDYNAMPI_MAX_MPI_RANK=8 - name: Build on macOS shell: bash run: cmake --build build --config ${{ matrix.build-type }} --parallel @@ -38,4 +40,5 @@ jobs: run: | cd build echo "Testing with $MPI_TYPE" + export DYNAMPI_MAX_MPI_RANK=8 ctest --output-on-failure --parallel -C ${{ matrix.build-type }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 02c6dd2..65f9595 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -10,6 +10,7 @@ on: jobs: pre-commit: runs-on: ubuntu-latest + timeout-minutes: 5 steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/sanitizers.yml b/.github/workflows/sanitizers.yml index 5f1811f..19b60b6 100644 --- a/.github/workflows/sanitizers.yml +++ b/.github/workflows/sanitizers.yml @@ -13,6 +13,7 @@ env: jobs: linux-debug-sanitizers: runs-on: ubuntu-latest + timeout-minutes: 5 strategy: matrix: sanitizer: [address, undefined] @@ -33,6 +34,7 @@ jobs: -DCMAKE_CXX_COMPILER=g++-14 \ -DDYNAMPI_BUILD_TESTS=ON \ -DDYNAMPI_BUILD_BENCHMARKS=ON \ + -DDYNAMPI_MAX_MPI_RANK=8 \ -DCMAKE_CXX_FLAGS="-fsanitize=${{ matrix.sanitizer }} -fno-omit-frame-pointer" \ -DCMAKE_C_FLAGS="-fsanitize=${{ matrix.sanitizer }} -fno-omit-frame-pointer" \ -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=${{ matrix.sanitizer }}" \ @@ -46,4 +48,5 @@ jobs: cd build echo "Testing with $MPI_TYPE and ${{ matrix.sanitizer }} sanitizer" export LSAN_OPTIONS=suppressions=${{ github.workspace }}/test/lsan.supp - ctest --output-on-failure --parallel -C Debug + export DYNAMPI_MAX_MPI_RANK=8 + ctest --output-on-failure --verbose -C Debug diff --git a/.github/workflows/smpi.yml b/.github/workflows/smpi.yml index d6a6b50..144b079 100644 --- a/.github/workflows/smpi.yml +++ b/.github/workflows/smpi.yml @@ -11,7 +11,7 @@ on: jobs: smpi: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 5 steps: - name: Checkout Repository uses: actions/checkout@v4 @@ -46,7 +46,8 @@ jobs: -DMPIEXEC_EXECUTABLE=/usr/bin/smpirun \ -DMPIEXEC_PREFLAGS=-platform\;platform.xml \ -DDYNAMPI_BUILD_TESTS=ON \ - -DDYNAMPI_BUILD_BENCHMARKS=OFF" + -DDYNAMPI_BUILD_BENCHMARKS=OFF \ + -DDYNAMPI_MAX_MPI_RANK=8" echo "CMAKE_ARGS: $CMAKE_ARGS" @@ -81,7 +82,8 @@ jobs: run: | echo "Testing with $MPI_TYPE" cp platform.xml ${{ steps.strings.outputs.build-output-dir }}/test + export DYNAMPI_MAX_MPI_RANK=8 ctest --output-on-failure --parallel - echo "Additionally running MPI tests with 100 ranks..." - smpirun -np 100 -platform platform.xml ./test/mpi_test + echo "Additionally running MPI tests with 8 ranks..." + smpirun -np 8 -platform platform.xml ./test/mpi_test diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 8556ea9..90d1fc5 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -19,6 +19,7 @@ jobs: windows-msmpi: name: Windows MS-MPI (MPI 3.1) runs-on: windows-latest + timeout-minutes: 10 strategy: matrix: build-type: [Debug, Release] @@ -34,7 +35,8 @@ jobs: cmake -B build ` -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} ` -DDYNAMPI_BUILD_TESTS=ON ` - -DDYNAMPI_BUILD_BENCHMARKS=ON + -DDYNAMPI_BUILD_BENCHMARKS=ON ` + -DDYNAMPI_MAX_MPI_RANK=8 - name: Build (MS-MPI) shell: pwsh run: cmake --build build --config ${{ matrix.build-type }} --parallel @@ -43,10 +45,12 @@ jobs: run: | cd build echo "Testing with Microsoft MPI (MPI 3.1 features)" - ctest --output-on-failure --parallel -C ${{ matrix.build-type }} + $env:DYNAMPI_MAX_MPI_RANK = "8" + ctest --output-on-failure -j 1 --timeout 180 -C ${{ matrix.build-type }} windows-mingw: name: Windows MinGW runs-on: windows-latest + timeout-minutes: 10 strategy: matrix: build-type: [Debug, Release] @@ -97,7 +101,8 @@ jobs: -DMPI_C_LIBRARIES=/mingw64/lib/libmsmpi.a \ -DMPI_CXX_LIBRARIES=/mingw64/lib/libmsmpi.a \ -DDYNAMPI_BUILD_TESTS=ON \ - -DDYNAMPI_BUILD_BENCHMARKS=ON + -DDYNAMPI_BUILD_BENCHMARKS=ON \ + -DDYNAMPI_MAX_MPI_RANK=8 - name: Build (MinGW) shell: msys2 {0} run: cmake --build build --config ${{ matrix.build-type }} --parallel @@ -106,10 +111,12 @@ jobs: run: | cd build echo "Testing with $MPI_TYPE" - ctest --output-on-failure --parallel -C ${{ matrix.build-type }} + export DYNAMPI_MAX_MPI_RANK=8 + ctest --output-on-failure -j 1 --timeout 180 -C ${{ matrix.build-type }} windows-intel-mpi: name: Windows Intel MPI (MPI 4.0) runs-on: windows-latest + timeout-minutes: 10 strategy: matrix: build-type: [Debug, Release] @@ -126,7 +133,8 @@ jobs: cmake -B build ` -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} ` -DDYNAMPI_BUILD_TESTS=ON ` - -DDYNAMPI_BUILD_BENCHMARKS=ON + -DDYNAMPI_BUILD_BENCHMARKS=ON ` + -DDYNAMPI_MAX_MPI_RANK=8 - name: Build (Intel MPI) shell: pwsh run: cmake --build build --config ${{ matrix.build-type }} --parallel @@ -135,4 +143,5 @@ jobs: run: | cd build echo "Testing with Intel MPI (MPI 4.0 support)" - ctest --output-on-failure --parallel -C ${{ matrix.build-type }} + $env:DYNAMPI_MAX_MPI_RANK = "8" + ctest --output-on-failure -j 1 --timeout 180 -C ${{ matrix.build-type }} diff --git a/.gitignore b/.gitignore index 649aad9..ef46468 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ bin Testing *.btr commands.txt +core.* +__pycache__/ +*.png diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 83c7b15..6b7858f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,14 @@ repos: rev: v20.1.8 hooks: - id: clang-format + - repo: local + hooks: + - id: cppcheck + name: cppcheck + entry: cppcheck --enable=warning --suppress=missingIncludeSystem -I include + --std=c++20 --inline-suppr + language: system + types_or: [c, c++] - repo: https://github.com/google/yamlfmt.git rev: v0.17.2 hooks: diff --git a/CMakeLists.txt b/CMakeLists.txt index d102c1c..51c19c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,9 @@ if(MSVC) else() add_compile_options(-Wall -Wextra -Wpedantic -Werror -fno-ms-extensions) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") + add_compile_options(-diag-disable=10430 -Wno-unknown-warning-option) +endif() set(CMAKE_CXX_FLAGS_COVERAGE "-O0 -g -coverage -fprofile-arcs -ftest-coverage \ -fno-elide-constructors ${CMAKE_CXX_FLAGS_COVERAGE}") diff --git a/REUSE.toml b/REUSE.toml new file mode 100644 index 0000000..9e98fec --- /dev/null +++ b/REUSE.toml @@ -0,0 +1,13 @@ +version = 1 + +[[annotations]] +path = "benchmark/results/**.csv" +SPDX-FileCopyrightText = "2025 QDX Technologies" +SPDX-License-Identifier = "Apache-2.0" +SPDX-Comment = "Generated benchmark results." + +[[annotations]] +path = "**.png" +SPDX-FileCopyrightText = "2025 QDX Technologies" +SPDX-License-Identifier = "Apache-2.0" +SPDX-Comment = "Generated result plots." diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 6391c99..5b32eaa 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -9,11 +9,21 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(cxxopts) -add_executable(asymptotic_distribution_throughput - asymptotic_distribution_throughput.cpp -) -target_link_libraries(asymptotic_distribution_throughput - PRIVATE - dynampi - cxxopts::cxxopts +set(benchmarks + asymptotic_distribution_throughput + strong_scaling_distribution_rate + pingpong + timer_resolution + naive_shutdown_time ) + +foreach(benchmark IN LISTS benchmarks) + add_executable(${benchmark} + ${benchmark}.cpp + ) + target_link_libraries(${benchmark} + PRIVATE + dynampi + cxxopts::cxxopts + ) +endforeach() diff --git a/benchmark/asymptotic_distribution_throughput.cpp b/benchmark/asymptotic_distribution_throughput.cpp index db92dfb..8467103 100644 --- a/benchmark/asymptotic_distribution_throughput.cpp +++ b/benchmark/asymptotic_distribution_throughput.cpp @@ -74,11 +74,12 @@ static double run_single_benchmark(const BenchmarkOptions& opts) { MPI_Barrier(MPI_COMM_WORLD); using Task = size_t; - using Result = std::vector; + // using Result = std::vector; + using Result = size_t; - auto worker_task = [&opts](Task task) -> Result { - return std::vector(opts.message_size, std::byte(task)); - }; + // auto worker_task = [&opts](Task task) -> Result { + // return std::vector(opts.message_size, std::byte(task)); + auto worker_task = [](Task task) -> Result { return task; }; dynampi::Timer dynamic_timer; auto dynamic_communicator = make_dynamic_communicator(opts.remove_root_from_distribution); @@ -103,8 +104,9 @@ static double run_single_benchmark(const BenchmarkOptions& opts) { if (work_distributer.is_root_manager()) { std::cout << "Dynamic task distribution completed successfully." << std::endl; const auto& stats = work_distributer.get_statistics(); - for (size_t i = 0; i < stats.worker_task_counts.size(); i++) { - std::cout << "Rank " << i << ": " << "Tasks: " << stats.worker_task_counts[i] << std::endl; + for (size_t i = 0; i < stats.worker_task_counts->size(); i++) { + std::cout << "Rank " << i << ": " << "Tasks: " << stats.worker_task_counts->at(i) + << std::endl; } std::cout << "Total messages sent: " << stats.comm_statistics.send_count << std::endl; std::cout << "Total messages received: " << stats.comm_statistics.recv_count << std::endl; diff --git a/benchmark/aurora/aurora_compile.sh b/benchmark/aurora/aurora_compile.sh new file mode 100755 index 0000000..7506333 --- /dev/null +++ b/benchmark/aurora/aurora_compile.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +module load cmake + +# Default values +BUILD_DIR="build" +BUILD_TYPE="Release" + +usage() { + echo "Usage: $0 [-d build_dir] [-t build_type] [--clean]" + echo + echo "Options:" + echo " -d DIR Build directory (default: build)" + echo " -t TYPE Build type: Release, Debug, RelWithDebInfo, MinSizeRel (default: Release)" + echo " --clean Remove build directory before configuring" + exit 1 +} + +# Parse args +CLEAN=0 +while [[ $# -gt 0 ]]; do + case "$1" in + -d) BUILD_DIR="$2"; shift 2 ;; + -t) BUILD_TYPE="$2"; shift 2 ;; + --clean) CLEAN=1; shift ;; + -h|--help) usage ;; + *) echo "Unknown option: $1"; usage ;; + esac +done + +if [[ $CLEAN -eq 1 && -d "$BUILD_DIR" ]]; then + echo "Cleaning $BUILD_DIR" + rm -rf "$BUILD_DIR" +fi + +echo "Configuring with CMake..." +cmake -DCMAKE_BUILD_TYPE="$BUILD_TYPE" -B "$BUILD_DIR" + +echo "Building..." +cmake --build "$BUILD_DIR" -- -j"$(nproc)" + +echo "✅ Build finished in $BUILD_DIR ($BUILD_TYPE)" diff --git a/benchmark/naive_shutdown_time.cpp b/benchmark/naive_shutdown_time.cpp new file mode 100644 index 0000000..ac3d27a --- /dev/null +++ b/benchmark/naive_shutdown_time.cpp @@ -0,0 +1,174 @@ +/* + * SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using Task = uint32_t; +using Result = uint32_t; + +struct BenchmarkOptions { + uint64_t nodes = 0; + std::string system; + std::string output_path; +}; + +struct BenchmarkResult { + uint64_t workers = 0; + uint64_t world_size = 0; + double time_per_shutdown_us = 0.0; + uint64_t iterations = 0; +}; + +static void write_csv_header(std::ostream& os) { + os << "system,nodes,world_size,workers,time_per_shutdown_us,iterations\n"; +} + +static void write_csv_row(std::ostream& os, const BenchmarkOptions& opts, + const BenchmarkResult& result) { + os << opts.system << "," << opts.nodes << "," << result.world_size << "," << result.workers << "," + << result.time_per_shutdown_us << "," << result.iterations << "\n"; +} + +static BenchmarkResult run_benchmark([[maybe_unused]] const BenchmarkOptions& opts, MPI_Comm comm) { + dynampi::MPICommunicator<> comm_wrapper(comm, dynampi::MPICommunicator<>::Ownership::Reference); + int rank = 0; + int size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + + const uint64_t num_workers = (size == 1) ? 1 : static_cast(size - 1); + + // Simple worker function that does nothing + auto worker_function = [](Task task) -> Result { return static_cast(task); }; + + MPI_Barrier(comm_wrapper); + + // Overall timer for 10-second duration + dynampi::Timer overall_timer(dynampi::Timer::AutoStart::Yes); + const double target_duration_s = 10.0; + + // Per-iteration timer + dynampi::Timer iteration_timer(dynampi::Timer::AutoStart::No); + + double total_shutdown_time = 0.0; + uint64_t iterations = 0; + + while (true) { + bool should_continue = overall_timer.elapsed().count() < target_duration_s; + comm_wrapper.broadcast(should_continue); + if (!should_continue) { + break; + } + // Ensure all workers are ready + MPI_Barrier(comm_wrapper); + + { + dynampi::NaiveMPIWorkDistributor distributor( + worker_function, {.comm = comm, .manager_rank = 0, .auto_run_workers = true}); + + if (distributor.is_root_manager()) { + iteration_timer.reset(dynampi::Timer::AutoStart::Yes); + auto _ = distributor.finish_remaining_tasks(); + (void)_; + iteration_timer.stop(); + total_shutdown_time += iteration_timer.elapsed().count(); + iterations++; + } + } + + // Barrier to ensure all processes complete shutdown before next iteration + MPI_Barrier(comm_wrapper); + } + + // Calculate average shutdown time in microseconds + const double avg_shutdown_time_us = + (iterations > 0) ? (total_shutdown_time / static_cast(iterations)) * 1'000'000.0 + : 0.0; + + return BenchmarkResult{num_workers, static_cast(size), avg_shutdown_time_us, + iterations}; +} + +int main(int argc, char** argv) { + MPI_Init(&argc, &argv); + int world_rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + cxxopts::Options options("naive_shutdown_time", + "Benchmark naive distributor shutdown time with no tasks"); + options.add_options()("n,nodes", "Number of nodes for labeling output (defaults to world size)", + cxxopts::value()->default_value("0"))( + "S,system", "System label for plotting (frontier, aurora, ...)", + cxxopts::value()->default_value(""))( + "o,output", "Append results to CSV file", cxxopts::value()->default_value(""))( + "h,help", "Print usage"); + + cxxopts::ParseResult args; + try { + args = options.parse(argc, argv); + } catch (const std::exception& e) { + if (world_rank == 0) { + std::cerr << "Error parsing options: " << e.what() << "\n" << options.help() << std::endl; + } + MPI_Finalize(); + return 1; + } + + if (args.count("help")) { + if (world_rank == 0) { + std::cout << options.help() << std::endl; + } + MPI_Finalize(); + return 0; + } + + BenchmarkOptions opts; + opts.nodes = args["nodes"].as(); + opts.system = args["system"].as(); + opts.output_path = args["output"].as(); + + { + MPI_Comm comm = MPI_COMM_WORLD; + int rank = 0; + int size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + if (opts.nodes == 0) { + opts.nodes = static_cast(size); + } + + BenchmarkResult result = run_benchmark(opts, comm); + + if (rank == 0) { + std::cout << "RESULT" + << " nodes=" << opts.nodes << " world_size=" << result.world_size + << " workers=" << result.workers + << " time_per_shutdown_us=" << result.time_per_shutdown_us + << " iterations=" << result.iterations << std::endl; + if (!opts.output_path.empty()) { + std::ifstream check(opts.output_path); + const bool needs_header = + !check.good() || check.peek() == std::ifstream::traits_type::eof(); + check.close(); + std::ofstream out(opts.output_path, std::ios::app); + if (needs_header) { + write_csv_header(out); + } + write_csv_row(out, opts, result); + } + } + } + MPI_Finalize(); + return 0; +} diff --git a/benchmark/pingpong.cpp b/benchmark/pingpong.cpp new file mode 100644 index 0000000..daf72c3 --- /dev/null +++ b/benchmark/pingpong.cpp @@ -0,0 +1,474 @@ +// SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +// SPDX-License-Identifier: Apache-2.0 + +// mpi_pair_bench.cpp +#ifdef _WIN32 +#define _CRT_SECURE_NO_WARNINGS +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum class Method { SEND, ISEND, BSEND, SSEND }; + +struct Options { + std::size_t min_bytes = 1; + std::size_t max_bytes = 1u << 25; // 32 MiB + int factor = 2; // geometric progression; use 1 for linear + int warmup = 10; + int iters = 100; + int only_rank = -1; // if >=0, test only pairs involving this rank + std::vector methods; // default: all + std::string outfile = "mpi_pair_bench.csv"; +}; + +struct PingResult { + double avg_rtt_s; // average round-trip time (per message) + double send_call_total_s; // total time spent inside send() calls across timed iterations +}; + +static void die(int rank, const std::string &msg) { + if (rank == 0) std::cerr << "Error: " << msg << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); +} + +static std::string method_name(Method m) { + switch (m) { + case Method::SEND: + return "send"; + case Method::ISEND: + return "isend"; + case Method::BSEND: + return "bsend"; + case Method::SSEND: + return "ssend"; + } + return "?"; +} + +static std::optional parse_method(const std::string &s) { + if (s == "send") { + return Method::SEND; + } + if (s == "isend") { + return Method::ISEND; + } + if (s == "bsend") { + return Method::BSEND; + } + if (s == "ssend") { + return Method::SSEND; + } + return std::nullopt; +} + +static void parse_unsigned_arg(int rank, int &i, int argc, char **argv, const char *name, + std::size_t &out) { + if (i + 1 >= argc) die(rank, std::string("missing value for ") + name); + try { + out = std::stoull(argv[++i]); + } catch (const std::invalid_argument &e) { + die(rank, std::string("invalid value for ") + name + ": " + argv[i] + ": " + e.what()); + } catch (const std::out_of_range &e) { + die(rank, std::string("invalid value for ") + name + ": " + argv[i] + ": " + e.what()); + } +} + +static void parse_int_arg(int rank, int &i, int argc, char **argv, const char *name, int &out) { + if (i + 1 >= argc) die(rank, std::string("missing value for ") + name); + try { + out = std::stoi(argv[++i]); + } catch (const std::invalid_argument &e) { + die(rank, std::string("invalid value for ") + name + ": " + argv[i] + ": " + e.what()); + } catch (const std::out_of_range &e) { + die(rank, std::string("invalid value for ") + name + ": " + argv[i] + ": " + e.what()); + } +} + +static Options parse_args(int argc, char **argv, int rank) { + Options opt; + bool methods_specified = false; + + for (int i = 1; i < argc; ++i) { + std::string a = argv[i]; + if (a == "--min-bytes") { + parse_unsigned_arg(rank, i, argc, argv, "--min-bytes", opt.min_bytes); + } else if (a == "--max-bytes") { + parse_unsigned_arg(rank, i, argc, argv, "--max-bytes", opt.max_bytes); + } else if (a == "--factor") { + parse_int_arg(rank, i, argc, argv, "--factor", opt.factor); + } else if (a == "--warmup") { + parse_int_arg(rank, i, argc, argv, "--warmup", opt.warmup); + } else if (a == "--iters") { + parse_int_arg(rank, i, argc, argv, "--iters", opt.iters); + } else if (a == "--outfile") { + if (i + 1 >= argc) die(rank, "missing value for --outfile"); + opt.outfile = argv[++i]; + } else if (a == "--only-rank") { + parse_int_arg(rank, i, argc, argv, "--only-rank", opt.only_rank); + } else if (a == "--methods") { + if (i + 1 >= argc) die(rank, "missing value for --methods"); + ++i; + methods_specified = true; + opt.methods.clear(); + std::string list = argv[i]; + size_t start = 0; + while (start <= list.size()) { + size_t comma = list.find(',', start); + std::string tok = + (comma == std::string::npos) ? list.substr(start) : list.substr(start, comma - start); + auto m = parse_method(tok); + if (!m) die(rank, "unknown method in --methods: " + tok); + opt.methods.push_back(*m); + if (comma == std::string::npos) break; + start = comma + 1; + } + } else if (a == "-h" || a == "--help") { + if (rank == 0) { + std::cout + << "MPI pairwise bandwidth/latency benchmark\n\n" + "Usage: mpirun -n

./mpi_pair_bench [options]\n\n" + "Options:\n" + " --min-bytes N starting message size (default 1)\n" + " --max-bytes N maximum message size (default 33554432 = 32 MiB)\n" + " --factor K size multiplier per step (default 2; use 1 for linear)\n" + " --warmup W warmup iterations per size (default 10)\n" + " --iters I timed iterations per size (default 100)\n" + " --methods LIST subset of: send,isend,bsend,ssend (default: all)\n" + " --only-rank R only test pairs involving rank R (default: all pairs)\n" + " --outfile PATH CSV output file (default mpi_pair_bench.csv)\n"; + } + MPI_Finalize(); + std::exit(0); + } else { + die(rank, "unknown argument: " + a); + } + } + + if (!methods_specified) { + opt.methods = {Method::SEND, Method::ISEND, Method::BSEND, Method::SSEND}; + } + if (opt.min_bytes == 0) die(rank, "--min-bytes must be >= 1"); + if (opt.max_bytes < opt.min_bytes) die(rank, "--max-bytes must be >= --min-bytes"); + if (opt.max_bytes > INT_MAX) die(rank, "--max-bytes must be <= INT_MAX"); + if (opt.factor < 1) die(rank, "--factor must be >= 1"); + if (opt.iters <= 0 || opt.warmup < 0) die(rank, "iterations must be positive"); + return opt; +} + +// Helper to perform MPI send based on method, optionally tracking time +struct SendResult { + MPI_Request request; + double elapsed_time; +}; + +static SendResult do_send(Method method, const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, bool track_time) { + SendResult res{MPI_REQUEST_NULL, 0.0}; + double t0 = track_time ? MPI_Wtime() : 0.0; + + if (method == Method::ISEND) { + MPI_Isend(buf, count, datatype, dest, tag, comm, &res.request); + } else if (method == Method::SEND) { + MPI_Send(buf, count, datatype, dest, tag, comm); + } else if (method == Method::BSEND) { + MPI_Bsend(buf, count, datatype, dest, tag, comm); + } else /* SSEND */ { + MPI_Ssend(buf, count, datatype, dest, tag, comm); + } + + if (track_time) { + res.elapsed_time = MPI_Wtime() - t0; + } + return res; +} + +// Measure one direction using the unified pattern: +// sender: for i: send(); recv(); then if isend -> Waitall +// receiver: for i: recv(); send(); then if isend -> Waitall +static PingResult ping_once(int sender, int receiver, int me, std::size_t bytes, int warmup, + int iters, std::vector &buf, Method method) { + const int tag = 42424; + + // ---- Warmup (no timing) ---- + if (warmup > 0) { + if (me == sender) { + std::vector sreq; + sreq.reserve(method == Method::ISEND ? warmup : 0); + for (int w = 0; w < warmup; ++w) { + auto send_res = + do_send(method, buf.data(), (int)bytes, MPI_CHAR, receiver, tag, MPI_COMM_WORLD, false); + if (method == Method::ISEND) { + sreq.push_back(send_res.request); + } + MPI_Recv(buf.data(), (int)bytes, MPI_CHAR, receiver, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + if (method == Method::ISEND && !sreq.empty()) + MPI_Waitall((int)sreq.size(), sreq.data(), MPI_STATUSES_IGNORE); + } else if (me == receiver) { + std::vector sreq; + sreq.reserve(method == Method::ISEND ? warmup : 0); + for (int w = 0; w < warmup; ++w) { + MPI_Recv(buf.data(), (int)bytes, MPI_CHAR, sender, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + auto send_res = + do_send(method, buf.data(), (int)bytes, MPI_CHAR, sender, tag, MPI_COMM_WORLD, false); + if (method == Method::ISEND) { + sreq.push_back(send_res.request); + } + } + if (method == Method::ISEND && !sreq.empty()) + MPI_Waitall((int)sreq.size(), sreq.data(), MPI_STATUSES_IGNORE); + } + } + + // ---- Timed phase ---- + PingResult res{-1.0, -1.0}; + if (me == sender) { + std::vector sreq; + sreq.reserve(method == Method::ISEND ? iters : 0); + double send_call_total = 0.0; + + double t0 = MPI_Wtime(); + for (int i = 0; i < iters; ++i) { + auto send_res = + do_send(method, buf.data(), (int)bytes, MPI_CHAR, receiver, tag, MPI_COMM_WORLD, true); + send_call_total += send_res.elapsed_time; + if (method == Method::ISEND) { + sreq.push_back(send_res.request); + } + + MPI_Recv(buf.data(), (int)bytes, MPI_CHAR, receiver, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + if (method == Method::ISEND && !sreq.empty()) + MPI_Waitall((int)sreq.size(), sreq.data(), MPI_STATUSES_IGNORE); + double t1 = MPI_Wtime(); + + res.avg_rtt_s = (t1 - t0) / (double)iters; + res.send_call_total_s = send_call_total; + } else if (me == receiver) { + std::vector sreq; + sreq.reserve(method == Method::ISEND ? iters : 0); + for (int i = 0; i < iters; ++i) { + MPI_Recv(buf.data(), (int)bytes, MPI_CHAR, sender, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + auto send_res = + do_send(method, buf.data(), (int)bytes, MPI_CHAR, sender, tag, MPI_COMM_WORLD, false); + if (method == Method::ISEND) { + sreq.push_back(send_res.request); + } + } + if (method == Method::ISEND && !sreq.empty()) + MPI_Waitall((int)sreq.size(), sreq.data(), MPI_STATUSES_IGNORE); + } + return res; +} + +int main(int argc, char **argv) { + MPI_Init(&argc, &argv); + int world, me; + MPI_Comm_size(MPI_COMM_WORLD, &world); + MPI_Comm_rank(MPI_COMM_WORLD, &me); + + if (world < 2) { + if (me == 0) std::cerr << "Run with at least 2 ranks.\n"; + MPI_Finalize(); + return 1; + } + + Options opt = parse_args(argc, argv, me); + + if (opt.only_rank >= world) { + die(me, "--only-rank " + std::to_string(opt.only_rank) + " is out of range for world size " + + std::to_string(world)); + } + + // Gather processor names for locality classification + char myname[MPI_MAX_PROCESSOR_NAME] = {}; + int mylen = 0; + MPI_Get_processor_name(myname, &mylen); + std::vector allnames(world * MPI_MAX_PROCESSOR_NAME, 0); + MPI_Allgather(myname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, allnames.data(), MPI_MAX_PROCESSOR_NAME, + MPI_CHAR, MPI_COMM_WORLD); + auto rank_name = [&](int r) -> std::string { + const char *p = &allnames[r * MPI_MAX_PROCESSOR_NAME]; + return std::string(p); // buffer is zero-padded + }; + + // Prepare message sizes + std::vector sizes; + { + std::size_t s = opt.min_bytes; + if (opt.factor == 1) { + for (; s <= opt.max_bytes; ++s) sizes.push_back(s); + } else { + while (s <= opt.max_bytes) { + sizes.push_back(s); + if (s > opt.max_bytes / (std::size_t)opt.factor) break; + s *= (std::size_t)opt.factor; + } + } + } + + // Reusable buffer + std::vector buffer(opt.max_bytes, 0); + + // Attach a Bsend buffer if BSEND is in use (supports one outstanding bsend at a time) + std::vector bsend_storage; + bool have_bsend = + std::find(opt.methods.begin(), opt.methods.end(), Method::BSEND) != opt.methods.end(); + if (have_bsend) { + int pack_max = 0; + MPI_Pack_size((int)opt.max_bytes, MPI_CHAR, MPI_COMM_WORLD, &pack_max); + int bsz = pack_max + MPI_BSEND_OVERHEAD; + bsend_storage.resize((size_t)bsz); + if (MPI_Buffer_attach(bsend_storage.data(), bsz) != MPI_SUCCESS) { + die(me, "MPI_Buffer_attach failed"); + } + } + + // CSV accumulation (only lower rank logs) + std::ostringstream local_csv; + auto add_line = [&](int src, int dst, const char *direction, const char *locality, + std::size_t bytes, int iters, const PingResult &res, Method method) { + double latency_s = res.avg_rtt_s / 2.0; + double bw_MBps = (2.0 * (double)bytes / res.avg_rtt_s) / 1.0e6; // MB/s (1e6) + local_csv << src << ',' << dst << ',' << method_name(method) << ',' << direction << ',' + << locality << ',' << bytes << ',' << iters << ',' << std::setprecision(12) + << res.avg_rtt_s << ',' << std::setprecision(12) << latency_s << ',' + << std::setprecision(12) << bw_MBps << ',' << std::setprecision(12) + << res.send_call_total_s << '\n'; + }; + + auto pair_is_enabled = [&](int a, int b) -> bool { + if (opt.only_rank < 0) return true; + return (a == opt.only_rank) || (b == opt.only_rank); + }; + + const int TAG_B_TO_A_RESULT = 88001; + + // Main sweep: pairs × sizes × methods + for (int a = 0; a < world; ++a) { + for (int b = a + 1; b < world; ++b) { + if (!pair_is_enabled(a, b)) continue; + + const bool same_node = (rank_name(a) == rank_name(b)); + const char *locality = same_node ? "intranode" : "internode"; + + for (std::size_t bytes : sizes) { + for (Method m : opt.methods) { + // a->b + MPI_Barrier(MPI_COMM_WORLD); + PingResult rtt_ab = ping_once(a, b, me, bytes, opt.warmup, opt.iters, buffer, m); + + // b->a + MPI_Barrier(MPI_COMM_WORLD); + PingResult rtt_b_to_a = ping_once(b, a, me, bytes, opt.warmup, opt.iters, buffer, m); + + // Ship b->a sender's measurement to logger (rank a) + if (me == b) { + double payload[2] = {rtt_b_to_a.avg_rtt_s, rtt_b_to_a.send_call_total_s}; + MPI_Send(payload, 2, MPI_DOUBLE, a, TAG_B_TO_A_RESULT, MPI_COMM_WORLD); + } + + if (me == a) { + double payload[2]; + MPI_Recv(payload, 2, MPI_DOUBLE, b, TAG_B_TO_A_RESULT, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + PingResult rtt_b_to_a_from_b{payload[0], payload[1]}; + add_line(a, b, "a->b", locality, bytes, opt.iters, rtt_ab, m); // measured by a + add_line(a, b, "b->a", locality, bytes, opt.iters, rtt_b_to_a_from_b, + m); // measured by b + } + + MPI_Barrier(MPI_COMM_WORLD); + } + } + } + } + + if (have_bsend) { + void *bufptr = nullptr; + int size = 0; + MPI_Buffer_detach(&bufptr, &size); + } + + // Gather CSV chunks to rank 0 + std::string chunk = local_csv.str(); + long long local_len = (long long)chunk.size(); + std::vector all_lens(world, 0); + MPI_Gather(&local_len, 1, MPI_LONG_LONG, all_lens.data(), 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + + std::vector recvcounts, displs; + std::vector recvbuf; + if (me == 0) { + recvcounts.resize(world); + displs.resize(world); + int64_t offset = 0; + for (int r = 0; r < world; ++r) { + if (all_lens[r] > INT_MAX) { + die(0, "CSV output too large for MPI_Gatherv"); + } + recvcounts[r] = static_cast(all_lens[r]); + displs[r] = static_cast(offset); + offset += recvcounts[r]; + } + if (offset > INT_MAX) { + die(0, "CSV output too large for MPI_Gatherv"); + } + recvbuf.resize(static_cast(offset)); + } + + MPI_Gatherv(chunk.data(), (int)local_len, MPI_CHAR, recvbuf.data(), recvcounts.data(), + displs.data(), MPI_CHAR, 0, MPI_COMM_WORLD); + + if (me == 0) { + std::string_view header = + "src_rank,dst_rank,method,direction,locality,msg_bytes,iters,avg_rtt_seconds,latency_" + "seconds,bandwidth_MBps,send_call_total_seconds\n"; + FILE *fp = std::fopen(opt.outfile.c_str(), "wb"); + if (!fp) { + std::cerr << "Failed to open output file: " << opt.outfile << std::endl; + MPI_Abort(MPI_COMM_WORLD, 2); + } + bool write_error = false; + if (std::fwrite(header.data(), 1, header.size(), fp) != header.size()) { + std::cerr << "Failed to write header to " << opt.outfile << std::endl; + write_error = true; + } + if (!write_error && !recvbuf.empty()) { + if (std::fwrite(recvbuf.data(), 1, recvbuf.size(), fp) != recvbuf.size()) { + std::cerr << "Failed to write data to " << opt.outfile << std::endl; + write_error = true; + } + } + if (!write_error && std::fflush(fp) != 0) { + std::cerr << "Failed to flush " << opt.outfile << std::endl; + write_error = true; + } + if (std::fclose(fp) != 0) { + std::cerr << "Failed to close " << opt.outfile << std::endl; + write_error = true; + } + if (write_error) { + MPI_Abort(MPI_COMM_WORLD, 2); + } + std::cout << "Wrote results to " << opt.outfile << std::endl; + } + + MPI_Finalize(); + return 0; +} diff --git a/benchmark/results/aurora/1-dynampi_shutdown_aurora_1-8273278.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/1-dynampi_shutdown_aurora_1-8273278.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..dba4d7f --- /dev/null +++ b/benchmark/results/aurora/1-dynampi_shutdown_aurora_1-8273278.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,1,102,101,56.2148,47544 diff --git a/benchmark/results/aurora/1-dynampi_ss_aurora_1-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/1-dynampi_ss_aurora_1-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..7b4a711 --- /dev/null +++ b/benchmark/results/aurora/1-dynampi_ss_aurora_1-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,1,102,101,3292035,10,329203 +aurora,naive,fixed,10,10,1,102,101,3038956,10.0132,303495 +aurora,naive,fixed,100,10,1,102,101,3046127,10.0791,302222 +aurora,naive,fixed,1000,10,1,102,101,697881,10.0214,69638.8 +aurora,naive,fixed,10000,10,1,102,101,72117,10.0723,7159.91 +aurora,naive,fixed,100000,10,1,102,101,8130,10.2931,789.851 +aurora,naive,fixed,1000000,10,1,102,101,998,10.0016,99.7837 +aurora,naive,random,1,10,1,102,101,3372090,10,337207 +aurora,naive,random,10,10,1,102,101,3538674,10,353866 +aurora,naive,random,100,10,1,102,101,3737838,10.0001,373780 +aurora,naive,random,1000,10,1,102,101,688195,10.0871,68225.1 +aurora,naive,random,10000,10,1,102,101,74008,10.0392,7371.88 +aurora,naive,random,100000,10,1,102,101,8253,10.263,804.148 +aurora,naive,random,1000000,10,1,102,101,1037,11.9386,86.8612 +aurora,hierarchical,fixed,1,10,1,102,101,25600214,10.0285,2.55275e+06 +aurora,hierarchical,fixed,10,10,1,102,101,19734672,10.1209,1.94989e+06 +aurora,hierarchical,fixed,100,10,1,102,101,5325657,10.0002,532556 +aurora,hierarchical,fixed,1000,10,1,102,101,631484,10.1692,62097.9 +aurora,hierarchical,fixed,10000,10,1,102,101,65076,10.0286,6489.06 +aurora,hierarchical,fixed,100000,10,1,102,101,7275,10.2691,708.435 +aurora,hierarchical,fixed,1000000,10,1,102,101,864,10.0021,86.3822 +aurora,hierarchical,random,1,10,1,102,101,26725337,10,2.67252e+06 +aurora,hierarchical,random,10,10,1,102,101,18693436,10,1.86933e+06 +aurora,hierarchical,random,100,10,1,102,101,4084427,10.0003,408431 +aurora,hierarchical,random,1000,10,1,102,101,455397,10.0021,45530.1 +aurora,hierarchical,random,10000,10,1,102,101,46179,10.0178,4609.71 +aurora,hierarchical,random,100000,10,1,102,101,5248,10.51,499.332 +aurora,hierarchical,random,1000000,10,1,102,101,768,13.7015,56.0524 diff --git a/benchmark/results/aurora/128-dynampi_shutdown_aurora_128-8273285.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/128-dynampi_shutdown_aurora_128-8273285.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..800c81a --- /dev/null +++ b/benchmark/results/aurora/128-dynampi_shutdown_aurora_128-8273285.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,128,13056,13055,1.3148e+06,7 diff --git a/benchmark/results/aurora/128-dynampi_ss_aurora_128-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/128-dynampi_ss_aurora_128-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..cbd49a9 --- /dev/null +++ b/benchmark/results/aurora/128-dynampi_ss_aurora_128-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,128,13056,13055,3759459,10.1804,369286 +aurora,naive,fixed,10,10,128,13056,13055,4135601,10.1817,406181 +aurora,naive,fixed,100,10,128,13056,13055,3857471,10.1774,379025 +aurora,naive,fixed,1000,10,128,13056,13055,4391850,10.2552,428256 +aurora,naive,fixed,10000,10,128,13056,13055,862438,11.5385,74744.2 +aurora,naive,fixed,100000,10,128,13056,13055,1247116,10.3682,120283 +aurora,naive,fixed,1000000,10,128,13056,13055,137318,12.6656,10841.8 +aurora,naive,random,1,10,128,13056,13055,3185549,10.2509,310758 +aurora,naive,random,10,10,128,13056,13055,4089673,10.0479,407019 +aurora,naive,random,100,10,128,13056,13055,4510086,10.2189,441349 +aurora,naive,random,1000,10,128,13056,13055,4382619,10.2858,426084 +aurora,naive,random,10000,10,128,13056,13055,813690,10.1783,79943.7 +aurora,naive,random,100000,10,128,13056,13055,1162931,10.4364,111430 +aurora,naive,random,1000000,10,128,13056,13055,111302,12.1867,9133.04 +aurora,hierarchical,fixed,1,10,128,13056,13055,143076556,10.2942,1.38988e+07 +aurora,hierarchical,fixed,10,10,128,13056,13055,138750651,10.2194,1.35772e+07 +aurora,hierarchical,fixed,100,10,128,13056,13055,139061615,10.2833,1.3523e+07 +aurora,hierarchical,fixed,1000,10,128,13056,13055,64695180,10.2085,6.33736e+06 +aurora,hierarchical,fixed,10000,10,128,13056,13055,8711435,10.2067,853504 +aurora,hierarchical,fixed,100000,10,128,13056,13055,1059270,10.4242,101616 +aurora,hierarchical,fixed,1000000,10,128,13056,13055,146950,12.3262,11921.8 +aurora,hierarchical,random,1,10,128,13056,13055,136173192,10.2541,1.32799e+07 +aurora,hierarchical,random,10,10,128,13056,13055,138530865,10.273,1.3485e+07 +aurora,hierarchical,random,100,10,128,13056,13055,138680080,10.3062,1.34559e+07 +aurora,hierarchical,random,1000,10,128,13056,13055,47825162,10.2542,4.66395e+06 +aurora,hierarchical,random,10000,10,128,13056,13055,5700593,10.2524,556028 +aurora,hierarchical,random,100000,10,128,13056,13055,759388,10.4392,72743.8 +aurora,hierarchical,random,1000000,10,128,13056,13055,101932,13.4684,7568.24 diff --git a/benchmark/results/aurora/16-dynampi_shutdown_aurora_16-8273282.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/16-dynampi_shutdown_aurora_16-8273282.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..477a342 --- /dev/null +++ b/benchmark/results/aurora/16-dynampi_shutdown_aurora_16-8273282.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,16,1632,1631,65732.5,52 diff --git a/benchmark/results/aurora/16-dynampi_ss_aurora_16-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/16-dynampi_ss_aurora_16-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..4ab3dcc --- /dev/null +++ b/benchmark/results/aurora/16-dynampi_ss_aurora_16-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,16,1632,1631,926576,10.1276,91490.6 +aurora,naive,fixed,10,10,16,1632,1631,1014478,10.1828,99626.4 +aurora,naive,fixed,100,10,16,1632,1631,970322,10.2285,94864.7 +aurora,naive,fixed,1000,10,16,1632,1631,899363,10.1916,88245.9 +aurora,naive,fixed,10000,10,16,1632,1631,710556,10.0606,70627.4 +aurora,naive,fixed,100000,10,16,1632,1631,155556,10.3381,15046.9 +aurora,naive,fixed,1000000,10,16,1632,1631,17505,11.1251,1573.46 +aurora,naive,random,1,10,16,1632,1631,958883,10.0515,95396.8 +aurora,naive,random,10,10,16,1632,1631,1057683,10.1523,104182 +aurora,naive,random,100,10,16,1632,1631,1100268,10.1276,108640 +aurora,naive,random,1000,10,16,1632,1631,921259,10.194,90372.8 +aurora,naive,random,10000,10,16,1632,1631,787837,10.2133,77138.1 +aurora,naive,random,100000,10,16,1632,1631,157827,10.4086,15163.2 +aurora,naive,random,1000000,10,16,1632,1631,17253,11.9111,1448.48 +aurora,hierarchical,fixed,1,10,16,1632,1631,95909487,10.0591,9.53461e+06 +aurora,hierarchical,fixed,10,10,16,1632,1631,96384034,10.4675,9.20791e+06 +aurora,hierarchical,fixed,100,10,16,1632,1631,62607757,10.2033,6.136e+06 +aurora,hierarchical,fixed,1000,10,16,1632,1631,10707158,10.2557,1.04402e+06 +aurora,hierarchical,fixed,10000,10,16,1632,1631,1173716,10.276,114219 +aurora,hierarchical,fixed,100000,10,16,1632,1631,132710,10.2848,12903.6 +aurora,hierarchical,fixed,1000000,10,16,1632,1631,17872,12.4028,1440.96 +aurora,hierarchical,random,1,10,16,1632,1631,97037464,10.1521,9.55841e+06 +aurora,hierarchical,random,10,10,16,1632,1631,94803173,10.2254,9.27134e+06 +aurora,hierarchical,random,100,10,16,1632,1631,54551098,10.1787,5.35933e+06 +aurora,hierarchical,random,1000,10,16,1632,1631,7264168,10.2942,705658 +aurora,hierarchical,random,10000,10,16,1632,1631,767200,10.2683,74715.7 +aurora,hierarchical,random,100000,10,16,1632,1631,94483,10.6022,8911.62 +aurora,hierarchical,random,1000000,10,16,1632,1631,12728,13.4645,945.3 diff --git a/benchmark/results/aurora/2-dynampi_shutdown_aurora_2-8273279.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/2-dynampi_shutdown_aurora_2-8273279.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..5a96d51 --- /dev/null +++ b/benchmark/results/aurora/2-dynampi_shutdown_aurora_2-8273279.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,2,204,203,232.767,16092 diff --git a/benchmark/results/aurora/2-dynampi_ss_aurora_2-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/2-dynampi_ss_aurora_2-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..0ed3c02 --- /dev/null +++ b/benchmark/results/aurora/2-dynampi_ss_aurora_2-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,2,204,203,2320234,10,232023 +aurora,naive,fixed,10,10,2,204,203,2323683,10.0797,230532 +aurora,naive,fixed,100,10,2,204,203,2432285,10.1231,240271 +aurora,naive,fixed,1000,10,2,204,203,963304,10.052,95832.5 +aurora,naive,fixed,10000,10,2,204,203,122452,10.0034,12241 +aurora,naive,fixed,100000,10,2,204,203,16835,10.2284,1645.9 +aurora,naive,fixed,1000000,10,2,204,203,2200,11.0373,199.324 +aurora,naive,random,1,10,2,204,203,2558997,10.0916,253576 +aurora,naive,random,10,10,2,204,203,2852009,10,285201 +aurora,naive,random,100,10,2,204,203,2704860,10.0422,269349 +aurora,naive,random,1000,10,2,204,203,1015491,10.2629,98947.6 +aurora,naive,random,10000,10,2,204,203,122936,10.0804,12195.5 +aurora,naive,random,100000,10,2,204,203,17047,10.3119,1653.14 +aurora,naive,random,1000000,10,2,204,203,2137,11.8315,180.62 +aurora,hierarchical,fixed,1,10,2,204,203,35501354,10.1075,3.51236e+06 +aurora,hierarchical,fixed,10,10,2,204,203,33540165,10.203,3.28728e+06 +aurora,hierarchical,fixed,100,10,2,204,203,10089910,10.0002,1.00897e+06 +aurora,hierarchical,fixed,1000,10,2,204,203,1213536,10.1795,119214 +aurora,hierarchical,fixed,10000,10,2,204,203,129458,10.1806,12716.2 +aurora,hierarchical,fixed,100000,10,2,204,203,14734,10.2973,1430.86 +aurora,hierarchical,fixed,1000000,10,2,204,203,1929,12.671,152.237 +aurora,hierarchical,random,1,10,2,204,203,37185358,10.2636,3.62302e+06 +aurora,hierarchical,random,10,10,2,204,203,31345762,10.1917,3.07562e+06 +aurora,hierarchical,random,100,10,2,204,203,7656038,10.019,764152 +aurora,hierarchical,random,1000,10,2,204,203,857171,10.0783,85050.8 +aurora,hierarchical,random,10000,10,2,204,203,89629,10.1354,8843.2 +aurora,hierarchical,random,100000,10,2,204,203,10820,10.5683,1023.82 +aurora,hierarchical,random,1000000,10,2,204,203,1568,13.527,115.916 diff --git a/benchmark/results/aurora/256-dynampi_shutdown_aurora_256-8273286.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/256-dynampi_shutdown_aurora_256-8273286.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..9697998 --- /dev/null +++ b/benchmark/results/aurora/256-dynampi_shutdown_aurora_256-8273286.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,256,26112,26111,3.22161e+06,4 diff --git a/benchmark/results/aurora/256-dynampi_ss_aurora_256-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/256-dynampi_ss_aurora_256-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..72326d3 --- /dev/null +++ b/benchmark/results/aurora/256-dynampi_ss_aurora_256-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,256,26112,26111,4619072,11.966,386017 +aurora,naive,fixed,10,10,256,26112,26111,4641837,12.4846,371806 +aurora,naive,fixed,100,10,256,26112,26111,3237586,10.3013,314288 +aurora,naive,fixed,1000,10,256,26112,26111,4237900,10.2809,412212 +aurora,naive,fixed,10000,10,256,26112,26111,795236,10.2324,77717.2 +aurora,naive,fixed,100000,10,256,26112,26111,1280802,11.8403,108173 +aurora,naive,fixed,1000000,10,256,26112,26111,266610,11.6867,22813.1 +aurora,naive,random,1,10,256,26112,26111,4781788,13.4572,355334 +aurora,naive,random,10,10,256,26112,26111,5233230,12.882,406243 +aurora,naive,random,100,10,256,26112,26111,3550527,10.2433,346619 +aurora,naive,random,1000,10,256,26112,26111,4100379,10.2172,401320 +aurora,naive,random,10000,10,256,26112,26111,685419,11.7954,58108.9 +aurora,naive,random,100000,10,256,26112,26111,2266274,11.2013,202322 +aurora,naive,random,1000000,10,256,26112,26111,171232,11.9497,14329.4 +aurora,hierarchical,fixed,1,10,256,26112,26111,145731858,10.23,1.42456e+07 +aurora,hierarchical,fixed,10,10,256,26112,26111,144252187,10.248,1.40761e+07 +aurora,hierarchical,fixed,100,10,256,26112,26111,150907610,10.2015,1.47927e+07 +aurora,hierarchical,fixed,1000,10,256,26112,26111,100576835,10.2351,9.82667e+06 +aurora,hierarchical,fixed,10000,10,256,26112,26111,15747075,10.2446,1.53711e+06 +aurora,hierarchical,fixed,100000,10,256,26112,26111,2070254,10.5322,196564 +aurora,hierarchical,fixed,1000000,10,256,26112,26111,292292,12.2982,23767 +aurora,hierarchical,random,1,10,256,26112,26111,147387588,10.2316,1.44052e+07 +aurora,hierarchical,random,10,10,256,26112,26111,151057427,10.1957,1.48158e+07 +aurora,hierarchical,random,100,10,256,26112,26111,149040594,10.2656,1.45185e+07 +aurora,hierarchical,random,1000,10,256,26112,26111,78431231,10.2522,7.65015e+06 +aurora,hierarchical,random,10000,10,256,26112,26111,10525990,10.2869,1.02324e+06 +aurora,hierarchical,random,100000,10,256,26112,26111,1501539,10.4575,143585 +aurora,hierarchical,random,1000000,10,256,26112,26111,207806,15.6579,13271.6 diff --git a/benchmark/results/aurora/32-dynampi_shutdown_aurora_32-8273283.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/32-dynampi_shutdown_aurora_32-8273283.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..a7ade02 --- /dev/null +++ b/benchmark/results/aurora/32-dynampi_shutdown_aurora_32-8273283.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,32,3264,3263,173000,23 diff --git a/benchmark/results/aurora/32-dynampi_ss_aurora_32-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/32-dynampi_ss_aurora_32-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..3819060 --- /dev/null +++ b/benchmark/results/aurora/32-dynampi_ss_aurora_32-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,32,3264,3263,1971708,10.203,193248 +aurora,naive,fixed,10,10,32,3264,3263,1874286,10.2216,183366 +aurora,naive,fixed,100,10,32,3264,3263,1998433,10.2637,194709 +aurora,naive,fixed,1000,10,32,3264,3263,1780555,10.2429,173832 +aurora,naive,fixed,10000,10,32,3264,3263,1574554,10.3053,152791 +aurora,naive,fixed,100000,10,32,3264,3263,303456,10.4089,29153.6 +aurora,naive,fixed,1000000,10,32,3264,3263,34672,11.1478,3110.22 +aurora,naive,random,1,10,32,3264,3263,2053313,10.1821,201659 +aurora,naive,random,10,10,32,3264,3263,2051489,10.1645,201828 +aurora,naive,random,100,10,32,3264,3263,1986143,10.204,194643 +aurora,naive,random,1000,10,32,3264,3263,1648332,10.2367,161023 +aurora,naive,random,10000,10,32,3264,3263,1562902,10.2551,152402 +aurora,naive,random,100000,10,32,3264,3263,301658,10.4567,28848.4 +aurora,naive,random,1000000,10,32,3264,3263,34149,12.0491,2834.16 +aurora,hierarchical,fixed,1,10,32,3264,3263,109144885,10.268,1.06296e+07 +aurora,hierarchical,fixed,10,10,32,3264,3263,108544974,10.203,1.06385e+07 +aurora,hierarchical,fixed,100,10,32,3264,3263,89223525,10.189,8.75682e+06 +aurora,hierarchical,fixed,1000,10,32,3264,3263,19786955,10.189,1.942e+06 +aurora,hierarchical,fixed,10000,10,32,3264,3263,2232458,10.3681,215319 +aurora,hierarchical,fixed,100000,10,32,3264,3263,264786,10.6206,24931.3 +aurora,hierarchical,fixed,1000000,10,32,3264,3263,35784,12.2509,2920.93 +aurora,hierarchical,random,1,10,32,3264,3263,110723722,10.2266,1.08271e+07 +aurora,hierarchical,random,10,10,32,3264,3263,107073128,10.0836,1.06186e+07 +aurora,hierarchical,random,100,10,32,3264,3263,84979742,10.2703,8.27432e+06 +aurora,hierarchical,random,1000,10,32,3264,3263,13545663,10.1751,1.33126e+06 +aurora,hierarchical,random,10000,10,32,3264,3263,1483126,10.3192,143725 +aurora,hierarchical,random,100000,10,32,3264,3263,189678,10.4472,18155.9 +aurora,hierarchical,random,1000000,10,32,3264,3263,25876,15.121,1711.27 diff --git a/benchmark/results/aurora/4-dynampi_shutdown_aurora_4-8273280.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/4-dynampi_shutdown_aurora_4-8273280.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..cb008d4 --- /dev/null +++ b/benchmark/results/aurora/4-dynampi_shutdown_aurora_4-8273280.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,4,408,407,415.061,11331 diff --git a/benchmark/results/aurora/4-dynampi_ss_aurora_4-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/4-dynampi_ss_aurora_4-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..176597d --- /dev/null +++ b/benchmark/results/aurora/4-dynampi_ss_aurora_4-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,4,408,407,2770958,10.1144,273961 +aurora,naive,fixed,10,10,4,408,407,2728827,10.0002,272878 +aurora,naive,fixed,100,10,4,408,407,2656957,10.0001,265693 +aurora,naive,fixed,1000,10,4,408,407,2319196,10.0363,231081 +aurora,naive,fixed,10000,10,4,408,407,295337,10.0086,29508.4 +aurora,naive,fixed,100000,10,4,408,407,39834,10.159,3921.07 +aurora,naive,fixed,1000000,10,4,408,407,4409,11.0021,400.742 +aurora,naive,random,1,10,4,408,407,2775929,10,277593 +aurora,naive,random,10,10,4,408,407,3080768,10,308076 +aurora,naive,random,100,10,4,408,407,3260474,10.0029,325952 +aurora,naive,random,1000,10,4,408,407,2255570,10.0497,224442 +aurora,naive,random,10000,10,4,408,407,311848,10.0166,31133.3 +aurora,naive,random,100000,10,4,408,407,40522,10.1908,3976.34 +aurora,naive,random,1000000,10,4,408,407,4325,11.8484,365.027 +aurora,hierarchical,fixed,1,10,4,408,407,63171445,10.0368,6.29397e+06 +aurora,hierarchical,fixed,10,10,4,408,407,59782951,10.0001,5.97826e+06 +aurora,hierarchical,fixed,100,10,4,408,407,24326641,10.0249,2.42663e+06 +aurora,hierarchical,fixed,1000,10,4,408,407,3287894,10.0021,328721 +aurora,hierarchical,fixed,10000,10,4,408,407,340081,10.0203,33939.1 +aurora,hierarchical,fixed,100000,10,4,408,407,37755,10.2044,3699.88 +aurora,hierarchical,fixed,1000000,10,4,408,407,4017,12.0263,334.019 +aurora,hierarchical,random,1,10,4,408,407,62514471,10,6.25142e+06 +aurora,hierarchical,random,10,10,4,408,407,60507379,10.0001,6.0507e+06 +aurora,hierarchical,random,100,10,4,408,407,20013832,10.2073,1.96073e+06 +aurora,hierarchical,random,1000,10,4,408,407,2236408,10.0036,223560 +aurora,hierarchical,random,10000,10,4,408,407,233038,10.079,23121.2 +aurora,hierarchical,random,100000,10,4,408,407,25626,10.2339,2504.03 +aurora,hierarchical,random,1000000,10,4,408,407,3216,13.9369,230.755 diff --git a/benchmark/results/aurora/512-dynampi_shutdown_aurora_512-8273287.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/512-dynampi_shutdown_aurora_512-8273287.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..c86ec4e --- /dev/null +++ b/benchmark/results/aurora/512-dynampi_shutdown_aurora_512-8273287.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,512,52224,52223,1.32215e+07,1 diff --git a/benchmark/results/aurora/512-dynampi_ss_aurora_512-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/512-dynampi_ss_aurora_512-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..30a709d --- /dev/null +++ b/benchmark/results/aurora/512-dynampi_ss_aurora_512-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,512,52224,52223,4097144,31.191,131357 +aurora,naive,fixed,10,10,512,52224,52223,3723083,25.4862,146082 +aurora,naive,fixed,100,10,512,52224,52223,2141347,17.8665,119853 +aurora,naive,fixed,1000,10,512,52224,52223,209409,13.8726,15095.2 +aurora,naive,fixed,10000,10,512,52224,52223,209242,14.9043,14039.1 +aurora,naive,fixed,100000,10,512,52224,52223,209197,13.2934,15736.9 +aurora,naive,fixed,1000000,10,512,52224,52223,209095,29.7586,7026.37 +aurora,naive,random,1,10,512,52224,52223,4088630,29.857,136940 +aurora,naive,random,10,10,512,52224,52223,4582756,26.5137,172845 +aurora,naive,random,100,10,512,52224,52223,2340349,16.9888,137759 +aurora,naive,random,1000,10,512,52224,52223,209282,13.3642,15659.9 +aurora,naive,random,10000,10,512,52224,52223,209134,13.8665,15082 +aurora,naive,random,100000,10,512,52224,52223,209302,12.8027,16348.3 +aurora,naive,random,1000000,10,512,52224,52223,209204,51.0611,4097.13 +aurora,hierarchical,fixed,1,10,512,52224,52223,142287911,10.2518,1.38792e+07 +aurora,hierarchical,fixed,10,10,512,52224,52223,139946758,10.387,1.34733e+07 +aurora,hierarchical,fixed,100,10,512,52224,52223,139594506,10.2802,1.3579e+07 +aurora,hierarchical,fixed,1000,10,512,52224,52223,107690133,10.2897,1.04658e+07 +aurora,hierarchical,fixed,10000,10,512,52224,52223,24945068,10.3131,2.41878e+06 +aurora,hierarchical,fixed,100000,10,512,52224,52223,3685997,10.4885,351433 +aurora,hierarchical,fixed,1000000,10,512,52224,52223,573943,12.435,46155.5 +aurora,hierarchical,random,1,10,512,52224,52223,138737200,10.2986,1.34715e+07 +aurora,hierarchical,random,10,10,512,52224,52223,139848889,10.2705,1.36165e+07 +aurora,hierarchical,random,100,10,512,52224,52223,141593060,10.3642,1.36617e+07 +aurora,hierarchical,random,1000,10,512,52224,52223,94863386,10.3386,9.17563e+06 +aurora,hierarchical,random,10000,10,512,52224,52223,17146858,10.2664,1.67019e+06 +aurora,hierarchical,random,100000,10,512,52224,52223,2780472,10.6128,261993 +aurora,hierarchical,random,1000000,10,512,52224,52223,416372,16.0025,26019.2 diff --git a/benchmark/results/aurora/64-dynampi_shutdown_aurora_64-8273284.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/64-dynampi_shutdown_aurora_64-8273284.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..58b6b20 --- /dev/null +++ b/benchmark/results/aurora/64-dynampi_shutdown_aurora_64-8273284.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,64,6528,6527,429295,12 diff --git a/benchmark/results/aurora/64-dynampi_ss_aurora_64-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/64-dynampi_ss_aurora_64-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..82b0839 --- /dev/null +++ b/benchmark/results/aurora/64-dynampi_ss_aurora_64-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,64,6528,6527,3153458,10.2011,309128 +aurora,naive,fixed,10,10,64,6528,6527,3070733,10.1625,302162 +aurora,naive,fixed,100,10,64,6528,6527,3012121,10.2078,295081 +aurora,naive,fixed,1000,10,64,6528,6527,3005823,10.2481,293306 +aurora,naive,fixed,10000,10,64,6528,6527,1140339,10.3641,110028 +aurora,naive,fixed,100000,10,64,6528,6527,373045,10.9557,34050.2 +aurora,naive,fixed,1000000,10,64,6528,6527,69040,11.3344,6091.17 +aurora,naive,random,1,10,64,6528,6527,2972987,10.1104,294054 +aurora,naive,random,10,10,64,6528,6527,3188762,10.3839,307087 +aurora,naive,random,100,10,64,6528,6527,3508625,10.2335,342856 +aurora,naive,random,1000,10,64,6528,6527,3127984,10.2974,303764 +aurora,naive,random,10000,10,64,6528,6527,1206366,10.6337,113447 +aurora,naive,random,100000,10,64,6528,6527,289137,10.5432,27424.1 +aurora,naive,random,1000000,10,64,6528,6527,68086,12.1331,5611.58 +aurora,hierarchical,fixed,1,10,64,6528,6527,117185788,10.4487,1.12154e+07 +aurora,hierarchical,fixed,10,10,64,6528,6527,113120908,10.2455,1.1041e+07 +aurora,hierarchical,fixed,100,10,64,6528,6527,106274811,10.2038,1.04152e+07 +aurora,hierarchical,fixed,1000,10,64,6528,6527,33179042,10.257,3.23478e+06 +aurora,hierarchical,fixed,10000,10,64,6528,6527,3832028,10.269,373163 +aurora,hierarchical,fixed,100000,10,64,6528,6527,483621,10.5397,45885.6 +aurora,hierarchical,fixed,1000000,10,64,6528,6527,72473,12.2945,5894.73 +aurora,hierarchical,random,1,10,64,6528,6527,114058699,10.4063,1.09605e+07 +aurora,hierarchical,random,10,10,64,6528,6527,113958357,10.2227,1.11475e+07 +aurora,hierarchical,random,100,10,64,6528,6527,100452348,10.475,9.58975e+06 +aurora,hierarchical,random,1000,10,64,6528,6527,22216175,10.2638,2.16452e+06 +aurora,hierarchical,random,10000,10,64,6528,6527,2512161,10.2109,246028 +aurora,hierarchical,random,100000,10,64,6528,6527,358523,10.4799,34210.6 +aurora,hierarchical,random,1000000,10,64,6528,6527,48783,13.4007,3640.33 diff --git a/benchmark/results/aurora/8-dynampi_shutdown_aurora_8-8273281.aurora/naive_shutdown_aurora.csv b/benchmark/results/aurora/8-dynampi_shutdown_aurora_8-8273281.aurora/naive_shutdown_aurora.csv new file mode 100644 index 0000000..f08ac80 --- /dev/null +++ b/benchmark/results/aurora/8-dynampi_shutdown_aurora_8-8273281.aurora/naive_shutdown_aurora.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +aurora,8,816,815,1716.62,1756 diff --git a/benchmark/results/aurora/8-dynampi_ss_aurora_8-manual/strong_scaling_aurora.csv b/benchmark/results/aurora/8-dynampi_ss_aurora_8-manual/strong_scaling_aurora.csv new file mode 100644 index 0000000..aed3c39 --- /dev/null +++ b/benchmark/results/aurora/8-dynampi_ss_aurora_8-manual/strong_scaling_aurora.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +aurora,naive,fixed,1,10,8,816,815,1228409,10.075,121926 +aurora,naive,fixed,10,10,8,816,815,1206907,10.0815,119715 +aurora,naive,fixed,100,10,8,816,815,1227390,10.0271,122407 +aurora,naive,fixed,1000,10,8,816,815,1092887,10.032,108940 +aurora,naive,fixed,10000,10,8,816,815,414988,10.0574,41262.2 +aurora,naive,fixed,100000,10,8,816,815,79431,10.0807,7879.55 +aurora,naive,fixed,1000000,10,8,816,815,8059,10.0985,798.039 +aurora,naive,random,1,10,8,816,815,1234459,10.0221,123174 +aurora,naive,random,10,10,8,816,815,1504660,10.1793,147815 +aurora,naive,random,100,10,8,816,815,1429906,10.1001,141574 +aurora,naive,random,1000,10,8,816,815,1164652,10.1162,115127 +aurora,naive,random,10000,10,8,816,815,434861,10.1444,42867.1 +aurora,naive,random,100000,10,8,816,815,79430,10.2573,7743.73 +aurora,naive,random,1000000,10,8,816,815,8670,12.031,720.641 +aurora,hierarchical,fixed,1,10,8,816,815,79011039,10.2284,7.72464e+06 +aurora,hierarchical,fixed,10,10,8,816,815,77370138,10.1479,7.62428e+06 +aurora,hierarchical,fixed,100,10,8,816,815,40474774,10.1982,3.9688e+06 +aurora,hierarchical,fixed,1000,10,8,816,815,6013964,10.2077,589158 +aurora,hierarchical,fixed,10000,10,8,816,815,634971,10.241,62002.7 +aurora,hierarchical,fixed,100000,10,8,816,815,70335,10.3061,6824.63 +aurora,hierarchical,fixed,1000000,10,8,816,815,8884,12.129,732.461 +aurora,hierarchical,random,1,10,8,816,815,77582110,10.151,7.64282e+06 +aurora,hierarchical,random,10,10,8,816,815,75719678,10.1162,7.485e+06 +aurora,hierarchical,random,100,10,8,816,815,33532886,10.0429,3.33897e+06 +aurora,hierarchical,random,1000,10,8,816,815,4024376,10.0437,400685 +aurora,hierarchical,random,10000,10,8,816,815,422833,10.0998,41865.5 +aurora,hierarchical,random,100000,10,8,816,815,49633,10.3575,4791.96 +aurora,hierarchical,random,1000000,10,8,816,815,6340,15.0156,422.228 diff --git a/benchmark/results/frontier/1-dynampi_shutdown_frontier_1-4058166/naive_shutdown_frontier.csv b/benchmark/results/frontier/1-dynampi_shutdown_frontier_1-4058166/naive_shutdown_frontier.csv new file mode 100644 index 0000000..22c0f23 --- /dev/null +++ b/benchmark/results/frontier/1-dynampi_shutdown_frontier_1-4058166/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,1,56,55,24.2909,165568 diff --git a/benchmark/results/frontier/1-dynampi_ss_frontier_1-4054467/strong_scaling_frontier.csv b/benchmark/results/frontier/1-dynampi_ss_frontier_1-4054467/strong_scaling_frontier.csv new file mode 100644 index 0000000..df6e1ad --- /dev/null +++ b/benchmark/results/frontier/1-dynampi_ss_frontier_1-4054467/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,1,56,55,23186674,23186674,18.7768,1.23486e+06 +frontier,naive,fixed,10000,200,10,1,56,55,13768572,13768572,10.8496,1.26904e+06 +frontier,naive,fixed,100000,200,10,1,56,55,5477490,5477490,10.1831,537900 +frontier,naive,fixed,1000000,200,10,1,56,55,547414,547414,10.1623,53867.1 +frontier,naive,fixed,10000000,200,10,1,56,55,46916,46916,10.1724,4612.08 +frontier,naive,fixed,100000000,200,10,1,56,55,2854,2854,10.1013,282.539 +frontier,naive,fixed,1000000000,200,10,1,56,55,550,550,10.0013,54.993 +frontier,naive,poisson,1000,200,10,1,56,55,23350494,23350494,18.7675,1.2442e+06 +frontier,naive,poisson,10000,200,10,1,56,55,13699510,13699510,10.8446,1.26325e+06 +frontier,naive,poisson,100000,200,10,1,56,55,5440040,5440040,10.1841,534169 +frontier,naive,poisson,1000000,200,10,1,56,55,547428,547428,10.1661,53848.4 +frontier,naive,poisson,10000000,200,10,1,56,55,46950,46950,10.1725,4615.37 +frontier,naive,poisson,100000000,200,10,1,56,55,2910,2910,10.3,282.524 +frontier,naive,poisson,1000000000,200,10,1,56,55,550,550,10.0013,54.993 +frontier,hierarchical,fixed,1000,200,10,1,56,55,103808000,103808000,11.1631,9.29918e+06 +frontier,hierarchical,fixed,10000,200,10,1,56,55,35839000,35839000,10.2613,3.49263e+06 +frontier,hierarchical,fixed,100000,200,10,1,56,55,2787000,2787000,10.3762,268594 +frontier,hierarchical,fixed,1000000,200,10,1,56,55,110000,110000,10.0183,10979.9 +frontier,hierarchical,fixed,10000000,200,10,1,56,55,7700,7700,11.0621,696.071 +frontier,hierarchical,fixed,100000000,200,10,1,56,55,770,770,11.2013,68.7421 +frontier,hierarchical,fixed,1000000000,200,10,1,56,55,110,110,16.0005,6.87479 +frontier,hierarchical,poisson,1000,200,10,1,56,55,103554000,103554000,11.1279,9.30583e+06 +frontier,hierarchical,poisson,10000,200,10,1,56,55,34642000,34642000,10.2254,3.38783e+06 +frontier,hierarchical,poisson,100000,200,10,1,56,55,2744000,2744000,10.2129,268680 +frontier,hierarchical,poisson,1000000,200,10,1,56,55,110000,110000,10.0234,10974.3 +frontier,hierarchical,poisson,10000000,200,10,1,56,55,7700,7700,11.0625,696.047 +frontier,hierarchical,poisson,100000000,200,10,1,56,55,770,770,11.2009,68.7445 +frontier,hierarchical,poisson,1000000000,200,10,1,56,55,110,110,16.0004,6.87481 diff --git a/benchmark/results/frontier/1-dynampi_ss_frontier_1-4054787/strong_scaling_frontier.csv b/benchmark/results/frontier/1-dynampi_ss_frontier_1-4054787/strong_scaling_frontier.csv new file mode 100644 index 0000000..7761c24 --- /dev/null +++ b/benchmark/results/frontier/1-dynampi_ss_frontier_1-4054787/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,1,56,55,14009820,10.0002,1.40096e+06 +frontier,naive,fixed,10,10,1,56,55,14745060,10.0001,1.47449e+06 +frontier,naive,fixed,100,10,1,56,55,4609660,10.0003,460952 +frontier,naive,fixed,1000,10,1,56,55,539110,10.0021,53899.9 +frontier,naive,fixed,10000,10,1,56,55,55000,10.0248,5486.39 +frontier,naive,fixed,100000,10,1,56,55,2964,10.3015,287.725 +frontier,naive,fixed,1000000,10,1,56,55,444,13.0005,34.1525 +frontier,naive,random,1,10,1,56,55,13885960,10.0001,1.38858e+06 +frontier,naive,random,10,10,1,56,55,13543750,10.0002,1.35435e+06 +frontier,naive,random,100,10,1,56,55,3353020,10.0006,335281 +frontier,naive,random,1000,10,1,56,55,347930,10.0048,34776.2 +frontier,naive,random,10000,10,1,56,55,35090,10.0356,3496.57 +frontier,naive,random,100000,10,1,56,55,3523,10.5376,334.325 +frontier,naive,random,1000000,10,1,56,55,511,16.1204,31.699 +frontier,hierarchical,fixed,1,10,1,56,55,18667330,10.0001,1.86671e+06 +frontier,hierarchical,fixed,10,10,1,56,55,8643580,10.0002,864340 +frontier,hierarchical,fixed,100,10,1,56,55,1863620,10.0011,186342 +frontier,hierarchical,fixed,1000,10,1,56,55,183260,10.0093,18308.9 +frontier,hierarchical,fixed,10000,10,1,56,55,15730,10.1382,1551.56 +frontier,hierarchical,fixed,100000,10,1,56,55,2834,10.4018,272.453 +frontier,hierarchical,fixed,1000000,10,1,56,55,440,16.0006,27.499 +frontier,hierarchical,random,1,10,1,56,55,18438310,10.0001,1.84381e+06 +frontier,hierarchical,random,10,10,1,56,55,7449860,10.0003,744967 +frontier,hierarchical,random,100,10,1,56,55,951610,10.0017,95145.2 +frontier,hierarchical,random,1000,10,1,56,55,99550,10.0156,9939.47 +frontier,hierarchical,random,10000,10,1,56,55,11068,10.1014,1095.69 +frontier,hierarchical,random,100000,10,1,56,55,2410,10.7022,225.188 +frontier,hierarchical,random,1000000,10,1,56,55,440,19.8574,22.158 diff --git a/benchmark/results/frontier/1-dynampi_ss_frontier_1-4058681/strong_scaling_frontier.csv b/benchmark/results/frontier/1-dynampi_ss_frontier_1-4058681/strong_scaling_frontier.csv new file mode 100644 index 0000000..0be314e --- /dev/null +++ b/benchmark/results/frontier/1-dynampi_ss_frontier_1-4058681/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,1,56,55,12040519,10,1.20405e+06 +frontier,naive,fixed,10,10,1,56,55,12620115,10,1.26201e+06 +frontier,naive,fixed,100,10,1,56,55,5395057,10.0001,539500 +frontier,naive,fixed,1000,10,1,56,55,548975,10.0009,54892.7 +frontier,naive,fixed,10000,10,1,56,55,55034,10.0132,5496.15 +frontier,naive,fixed,100000,10,1,56,55,5499,10.0021,549.786 +frontier,naive,fixed,1000000,10,1,56,55,543,10.0008,54.2955 +frontier,naive,random,1,10,1,56,55,12448955,10,1.24489e+06 +frontier,naive,random,10,10,1,56,55,13253391,10,1.32534e+06 +frontier,naive,random,100,10,1,56,55,5389311,10.0002,538922 +frontier,naive,random,1000,10,1,56,55,549678,10.0018,54957.7 +frontier,naive,random,10000,10,1,56,55,54805,10.0184,5470.42 +frontier,naive,random,100000,10,1,56,55,5575,10.1503,549.245 +frontier,naive,random,1000000,10,1,56,55,577,11.639,49.5747 +frontier,hierarchical,fixed,1,10,1,56,55,16132218,10,1.61322e+06 +frontier,hierarchical,fixed,10,10,1,56,55,16973914,10,1.69739e+06 +frontier,hierarchical,fixed,100,10,1,56,55,4543143,10.0001,454311 +frontier,hierarchical,fixed,1000,10,1,56,55,529400,10.0003,52938.4 +frontier,hierarchical,fixed,10000,10,1,56,55,53877,10.0028,5386.2 +frontier,hierarchical,fixed,100000,10,1,56,55,5397,10.0047,539.445 +frontier,hierarchical,fixed,1000000,10,1,56,55,539,10.0013,53.8932 +frontier,hierarchical,random,1,10,1,56,55,15936845,10,1.59368e+06 +frontier,hierarchical,random,10,10,1,56,55,15838898,10,1.58389e+06 +frontier,hierarchical,random,100,10,1,56,55,3247533,10.0001,324751 +frontier,hierarchical,random,1000,10,1,56,55,341366,10.0002,34135.8 +frontier,hierarchical,random,10000,10,1,56,55,34134,10.0153,3408.17 +frontier,hierarchical,random,100000,10,1,56,55,3464,10.1463,341.404 +frontier,hierarchical,random,1000000,10,1,56,55,335,11.3834,29.4289 diff --git a/benchmark/results/frontier/1024-dynampi_shutdown_frontier_1024-4058261/naive_shutdown_frontier.csv b/benchmark/results/frontier/1024-dynampi_shutdown_frontier_1024-4058261/naive_shutdown_frontier.csv new file mode 100644 index 0000000..e59bb8c --- /dev/null +++ b/benchmark/results/frontier/1024-dynampi_shutdown_frontier_1024-4058261/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,1024,57344,57343,498323,20 diff --git a/benchmark/results/frontier/1024-dynampi_ss_frontier_1024-4058697/strong_scaling_frontier.csv b/benchmark/results/frontier/1024-dynampi_ss_frontier_1024-4058697/strong_scaling_frontier.csv new file mode 100644 index 0000000..a0b041b --- /dev/null +++ b/benchmark/results/frontier/1024-dynampi_ss_frontier_1024-4058697/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,1024,57344,57343,12646757,10.2384,1.23523e+06 +frontier,naive,fixed,10,10,1024,57344,57343,12728450,10.2096,1.24672e+06 +frontier,naive,fixed,100,10,1024,57344,57343,3121891,10.0535,310529 +frontier,naive,fixed,1000,10,1024,57344,57343,1508945,10.0329,150399 +frontier,naive,fixed,10000,10,1024,57344,57343,1372316,10.0463,136599 +frontier,naive,fixed,100000,10,1024,57344,57343,1322242,10.3211,128111 +frontier,naive,fixed,1000000,10,1024,57344,57343,523136,11.0993,47132.5 +frontier,naive,random,1,10,1024,57344,57343,12534290,10.2092,1.22774e+06 +frontier,naive,random,10,10,1024,57344,57343,12858648,10.2077,1.2597e+06 +frontier,naive,random,100,10,1024,57344,57343,3184408,10.0367,317275 +frontier,naive,random,1000,10,1024,57344,57343,1510168,10.0331,150519 +frontier,naive,random,10000,10,1024,57344,57343,1367983,10.0574,136018 +frontier,naive,random,100000,10,1024,57344,57343,1057967,10.1691,104037 +frontier,naive,random,1000000,10,1024,57344,57343,587912,11.9787,49079.8 +frontier,hierarchical,fixed,1,10,1024,57344,57343,103594651,10.0043,1.0355e+07 +frontier,hierarchical,fixed,10,10,1024,57344,57343,104492874,10.0042,1.04449e+07 +frontier,hierarchical,fixed,100,10,1024,57344,57343,102410818,10.0042,1.02368e+07 +frontier,hierarchical,fixed,1000,10,1024,57344,57343,100673795,10.0042,1.00632e+07 +frontier,hierarchical,fixed,10000,10,1024,57344,57343,56022791,10.0194,5.59145e+06 +frontier,hierarchical,fixed,100000,10,1024,57344,57343,5662051,10.2188,554083 +frontier,hierarchical,fixed,1000000,10,1024,57344,57343,584370,12.0055,48675 +frontier,hierarchical,random,1,10,1024,57344,57343,103763301,10.0043,1.03719e+07 +frontier,hierarchical,random,10,10,1024,57344,57343,104267975,10.0041,1.04225e+07 +frontier,hierarchical,random,100,10,1024,57344,57343,99933459,10.0043,9.98907e+06 +frontier,hierarchical,random,1000,10,1024,57344,57343,97689946,10.0043,9.7648e+06 +frontier,hierarchical,random,10000,10,1024,57344,57343,765278,18.8932,40505.5 +frontier,hierarchical,random,100000,10,1024,57344,57343,3634573,10.3729,350393 +frontier,hierarchical,random,1000000,10,1024,57344,57343,454428,15.3959,29516.1 diff --git a/benchmark/results/frontier/128-dynampi_shutdown_frontier_128-4058173/naive_shutdown_frontier.csv b/benchmark/results/frontier/128-dynampi_shutdown_frontier_128-4058173/naive_shutdown_frontier.csv new file mode 100644 index 0000000..1d77434 --- /dev/null +++ b/benchmark/results/frontier/128-dynampi_shutdown_frontier_128-4058173/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,128,7168,7167,27280.3,329 diff --git a/benchmark/results/frontier/128-dynampi_ss_frontier_128-4054794/strong_scaling_frontier.csv b/benchmark/results/frontier/128-dynampi_ss_frontier_128-4054794/strong_scaling_frontier.csv new file mode 100644 index 0000000..b72bf5f --- /dev/null +++ b/benchmark/results/frontier/128-dynampi_ss_frontier_128-4054794/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,128,7168,7167,3683838,10.067,365933 +frontier,naive,fixed,10,10,128,7168,7167,3769842,10.0639,374591 +frontier,naive,fixed,100,10,128,7168,7167,3311154,10.058,329205 +frontier,naive,fixed,1000,10,128,7168,7167,2465448,10.0633,244994 +frontier,naive,fixed,10000,10,128,7168,7167,2092764,10.1019,207164 +frontier,naive,fixed,100000,10,128,7168,7167,379900,10.3373,36750.5 +frontier,naive,fixed,1000000,10,128,7168,7167,57340,13.0328,4399.66 +frontier,naive,random,1,10,128,7168,7167,3755508,10.0738,372800 +frontier,naive,random,10,10,128,7168,7167,3726840,10.0406,371177 +frontier,naive,random,100,10,128,7168,7167,3325488,10.0798,329916 +frontier,naive,random,1000,10,128,7168,7167,2465448,10.0884,244384 +frontier,naive,random,10000,10,128,7168,7167,2322108,10.0742,230500 +frontier,naive,random,100000,10,128,7168,7167,414730,10.4981,39505.2 +frontier,naive,random,1000000,10,128,7168,7167,58757,14.5895,4027.36 +frontier,hierarchical,fixed,1,10,128,7168,7167,71784672,10.0038,7.17575e+06 +frontier,hierarchical,fixed,10,10,128,7168,7167,94575732,10.003,9.45478e+06 +frontier,hierarchical,fixed,100,10,128,7168,7167,97657542,10.0015,9.76425e+06 +frontier,hierarchical,fixed,1000,10,128,7168,7167,29943726,10.0067,2.99237e+06 +frontier,hierarchical,fixed,10000,10,128,7168,7167,3540498,10.0705,351570 +frontier,hierarchical,fixed,100000,10,128,7168,7167,371388,10.4116,35670.5 +frontier,hierarchical,fixed,1000000,10,128,7168,7167,57228,16.0056,3575.5 +frontier,hierarchical,random,1,10,128,7168,7167,70222266,10.0028,7.02028e+06 +frontier,hierarchical,random,10,10,128,7168,7167,65119362,10.0036,6.50959e+06 +frontier,hierarchical,random,100,10,128,7168,7167,28424322,10.0089,2.8399e+06 +frontier,hierarchical,random,1000,10,128,7168,7167,7425012,10.0551,738436 +frontier,hierarchical,random,10000,10,128,7168,7167,2221770,10.1115,219727 +frontier,hierarchical,random,100000,10,128,7168,7167,257148,10.6039,24250.2 +frontier,hierarchical,random,1000000,10,128,7168,7167,42948,17.984,2388.12 diff --git a/benchmark/results/frontier/128-dynampi_ss_frontier_128-4058688/strong_scaling_frontier.csv b/benchmark/results/frontier/128-dynampi_ss_frontier_128-4058688/strong_scaling_frontier.csv new file mode 100644 index 0000000..1c55eae --- /dev/null +++ b/benchmark/results/frontier/128-dynampi_ss_frontier_128-4058688/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,128,7168,7167,5679185,10.0082,567456 +frontier,naive,fixed,10,10,128,7168,7167,5710363,10.0157,570141 +frontier,naive,fixed,100,10,128,7168,7167,6428132,10.0282,641002 +frontier,naive,fixed,1000,10,128,7168,7167,2740884,10.0283,273315 +frontier,naive,fixed,10000,10,128,7168,7167,2397737,10.0278,239109 +frontier,naive,fixed,100000,10,128,7168,7167,716464,10.0321,71417 +frontier,naive,fixed,1000000,10,128,7168,7167,70868,10.0287,7066.49 +frontier,naive,random,1,10,128,7168,7167,5715286,10.0175,570529 +frontier,naive,random,10,10,128,7168,7167,5753891,10.0078,574939 +frontier,naive,random,100,10,128,7168,7167,6498880,10.0284,648049 +frontier,naive,random,1000,10,128,7168,7167,2730826,10.0286,272303 +frontier,naive,random,10000,10,128,7168,7167,2385026,10.0283,237828 +frontier,naive,random,100000,10,128,7168,7167,720111,10.1958,70627.9 +frontier,naive,random,1000000,10,128,7168,7167,76502,11.9879,6381.58 +frontier,hierarchical,fixed,1,10,128,7168,7167,103336119,10.0006,1.0333e+07 +frontier,hierarchical,fixed,10,10,128,7168,7167,104483546,10.0005,1.04478e+07 +frontier,hierarchical,fixed,100,10,128,7168,7167,101868200,10.0006,1.01863e+07 +frontier,hierarchical,fixed,1000,10,128,7168,7167,67608419,10.0019,6.75959e+06 +frontier,hierarchical,fixed,10000,10,128,7168,7167,7014044,10.0215,699902 +frontier,hierarchical,fixed,100000,10,128,7168,7167,703107,10.0087,70249.7 +frontier,hierarchical,fixed,1000000,10,128,7168,7167,73127,12.0027,6092.54 +frontier,hierarchical,random,1,10,128,7168,7167,103058588,10.0006,1.03052e+07 +frontier,hierarchical,random,10,10,128,7168,7167,104183225,10.0005,1.04178e+07 +frontier,hierarchical,random,100,10,128,7168,7167,99624543,10.0006,9.96186e+06 +frontier,hierarchical,random,1000,10,128,7168,7167,44051991,10.0032,4.40379e+06 +frontier,hierarchical,random,10000,10,128,7168,7167,4469070,10.0327,445451 +frontier,hierarchical,random,100000,10,128,7168,7167,453946,10.3398,43902.8 +frontier,hierarchical,random,1000000,10,128,7168,7167,56714,15.0959,3756.91 diff --git a/benchmark/results/frontier/16-dynampi_shutdown_frontier_16-4058170/naive_shutdown_frontier.csv b/benchmark/results/frontier/16-dynampi_shutdown_frontier_16-4058170/naive_shutdown_frontier.csv new file mode 100644 index 0000000..ee52ddc --- /dev/null +++ b/benchmark/results/frontier/16-dynampi_shutdown_frontier_16-4058170/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,16,896,895,3387.91,2616 diff --git a/benchmark/results/frontier/16-dynampi_ss_frontier_16-4054471/strong_scaling_frontier.csv b/benchmark/results/frontier/16-dynampi_ss_frontier_16-4054471/strong_scaling_frontier.csv new file mode 100644 index 0000000..64dddc5 --- /dev/null +++ b/benchmark/results/frontier/16-dynampi_ss_frontier_16-4054471/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,16,896,895,190523267,190523267,144.346,1.31991e+06 +frontier,naive,fixed,10000,200,10,16,896,895,30305359,30305359,23.2854,1.30148e+06 +frontier,naive,fixed,100000,200,10,16,896,895,8057025,8057025,12.6688,635973 +frontier,naive,fixed,1000000,200,10,16,896,895,2935707,2935707,10.6162,276530 +frontier,naive,fixed,10000000,200,10,16,896,895,760916,760916,10.1801,74745.7 +frontier,naive,fixed,100000000,200,10,16,896,895,45694,45694,10.1061,4521.41 +frontier,naive,fixed,1000000000,200,10,16,896,895,8950,8950,10.0356,891.829 +frontier,naive,poisson,1000,200,10,16,896,895,190398492,190398492,146.189,1.30242e+06 +frontier,naive,poisson,10000,200,10,16,896,895,30366935,30366935,23.207,1.30852e+06 +frontier,naive,poisson,100000,200,10,16,896,895,8046895,8046895,12.6731,634960 +frontier,naive,poisson,1000000,200,10,16,896,895,2941516,2941516,10.6153,277101 +frontier,naive,poisson,10000000,200,10,16,896,895,760918,760918,10.1805,74743 +frontier,naive,poisson,100000000,200,10,16,896,895,45694,45694,10.1053,4521.79 +frontier,naive,poisson,1000000000,200,10,16,896,895,8950,8950,10.0353,891.849 +frontier,hierarchical,fixed,1000,200,10,16,896,895,214670000,214670000,38.1494,5.62708e+06 +frontier,hierarchical,fixed,10000,200,10,16,896,895,55310000,55310000,12.7119,4.35103e+06 +frontier,hierarchical,fixed,100000,200,10,16,896,895,38330000,38330000,10.3212,3.71373e+06 +frontier,hierarchical,fixed,1000000,200,10,16,896,895,1790000,1790000,10.1593,176194 +frontier,hierarchical,fixed,10000000,200,10,16,896,895,35800,35800,12.3791,2891.96 +frontier,hierarchical,fixed,100000000,200,10,16,896,895,3580,3580,12.4027,288.647 +frontier,hierarchical,fixed,1000000000,200,10,16,896,895,895,895,31.0012,28.8698 +frontier,hierarchical,poisson,1000,200,10,16,896,895,214670000,214670000,38.0652,5.63954e+06 +frontier,hierarchical,poisson,10000,200,10,16,896,895,56180000,56180000,12.8342,4.37738e+06 +frontier,hierarchical,poisson,100000,200,10,16,896,895,38330000,38330000,10.3506,3.70315e+06 +frontier,hierarchical,poisson,1000000,200,10,16,896,895,1790000,1790000,10.166,176077 +frontier,hierarchical,poisson,10000000,200,10,16,896,895,35800,35800,12.38,2891.77 +frontier,hierarchical,poisson,100000000,200,10,16,896,895,3580,3580,12.403,288.64 +frontier,hierarchical,poisson,1000000000,200,10,16,896,895,895,895,36.0012,24.8603 diff --git a/benchmark/results/frontier/16-dynampi_ss_frontier_16-4054791/strong_scaling_frontier.csv b/benchmark/results/frontier/16-dynampi_ss_frontier_16-4054791/strong_scaling_frontier.csv new file mode 100644 index 0000000..5d1686b --- /dev/null +++ b/benchmark/results/frontier/16-dynampi_ss_frontier_16-4054791/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,16,896,895,3979170,10.0046,397735 +frontier,naive,fixed,10,10,16,896,895,3995280,10.0089,399173 +frontier,naive,fixed,100,10,16,896,895,3476180,10.0093,347295 +frontier,naive,fixed,1000,10,16,896,895,2373540,10.0151,236997 +frontier,naive,fixed,10000,10,16,896,895,762540,10.044,75920.2 +frontier,naive,fixed,100000,10,16,896,895,47484,10.3065,4607.19 +frontier,naive,fixed,1000000,10,16,896,895,7164,13.006,550.822 +frontier,naive,random,1,10,16,896,895,3984540,10.0067,398185 +frontier,naive,random,10,10,16,896,895,3989910,10.005,398790 +frontier,naive,random,100,10,16,896,895,3495870,10.0084,349293 +frontier,naive,random,1000,10,16,896,895,2597290,10.0091,259492 +frontier,naive,random,10000,10,16,896,895,537000,10.0455,53456.7 +frontier,naive,random,100000,10,16,896,895,53780,10.6237,5062.28 +frontier,naive,random,1000000,10,16,896,895,7348,14.3291,512.804 +frontier,hierarchical,fixed,1,10,16,896,895,32431220,10.0013,3.2427e+06 +frontier,hierarchical,fixed,10,10,16,896,895,16992470,10.0016,1.69898e+06 +frontier,hierarchical,fixed,100,10,16,896,895,25949630,10.0008,2.59475e+06 +frontier,hierarchical,fixed,1000,10,16,896,895,4176070,10.0052,417389 +frontier,hierarchical,fixed,10000,10,16,896,895,447500,10.0652,44460.2 +frontier,hierarchical,fixed,100000,10,16,896,895,45340,10.4097,4355.54 +frontier,hierarchical,fixed,1000000,10,16,896,895,7060,16.0025,441.18 +frontier,hierarchical,random,1,10,16,896,895,22303400,10.0011,2.23008e+06 +frontier,hierarchical,random,10,10,16,896,895,16752610,10.002,1.67493e+06 +frontier,hierarchical,random,100,10,16,896,895,3887880,10.0069,388521 +frontier,hierarchical,random,1000,10,16,896,895,619340,10.0629,61546.9 +frontier,hierarchical,random,10000,10,16,896,895,201736,10.1296,19915.4 +frontier,hierarchical,random,100000,10,16,896,895,33160,10.71,3096.17 +frontier,hierarchical,random,1000000,10,16,896,895,5320,17.524,303.583 diff --git a/benchmark/results/frontier/16-dynampi_ss_frontier_16-4058685/strong_scaling_frontier.csv b/benchmark/results/frontier/16-dynampi_ss_frontier_16-4058685/strong_scaling_frontier.csv new file mode 100644 index 0000000..b1686c5 --- /dev/null +++ b/benchmark/results/frontier/16-dynampi_ss_frontier_16-4058685/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,16,896,895,5924765,10,592474 +frontier,naive,fixed,10,10,16,896,895,5969337,10.0018,596824 +frontier,naive,fixed,100,10,16,896,895,6565573,10.0034,656337 +frontier,naive,fixed,1000,10,16,896,895,2849830,10.0033,284888 +frontier,naive,fixed,10000,10,16,896,895,894147,10.0099,89326.7 +frontier,naive,fixed,100000,10,16,896,895,89478,10.0063,8942.16 +frontier,naive,fixed,1000000,10,16,896,895,8850,10.0049,884.57 +frontier,naive,random,1,10,16,896,895,5946061,10.0002,594593 +frontier,naive,random,10,10,16,896,895,6013833,10.0004,601357 +frontier,naive,random,100,10,16,896,895,6631642,10.0034,662939 +frontier,naive,random,1000,10,16,896,895,2851279,10.0033,285033 +frontier,naive,random,10000,10,16,896,895,893936,10.0187,89226.5 +frontier,naive,random,100000,10,16,896,895,90058,10.1919,8836.25 +frontier,naive,random,1000000,10,16,896,895,9543,11.9244,800.295 +frontier,hierarchical,fixed,1,10,16,896,895,101244806,10.0002,1.01243e+07 +frontier,hierarchical,fixed,10,10,16,896,895,103394285,10.0002,1.03393e+07 +frontier,hierarchical,fixed,100,10,16,896,895,64932987,10.0003,6.49313e+06 +frontier,hierarchical,fixed,1000,10,16,896,895,8486748,10.0026,848454 +frontier,hierarchical,fixed,10000,10,16,896,895,875752,10.0193,87406.6 +frontier,hierarchical,fixed,100000,10,16,896,895,88622,10.2067,8682.72 +frontier,hierarchical,fixed,1000000,10,16,896,895,9158,12.0026,763 +frontier,hierarchical,random,1,10,16,896,895,101751609,10.0002,1.0175e+07 +frontier,hierarchical,random,10,10,16,896,895,103652771,10.0001,1.03652e+07 +frontier,hierarchical,random,100,10,16,896,895,49042617,10.0004,4.90409e+06 +frontier,hierarchical,random,1000,10,16,896,895,5510857,10.0027,550936 +frontier,hierarchical,random,10000,10,16,896,895,558116,10.0303,55642.8 +frontier,hierarchical,random,100000,10,16,896,895,56944,10.3491,5502.29 +frontier,hierarchical,random,1000000,10,16,896,895,6990,12.9946,537.914 diff --git a/benchmark/results/frontier/2-dynampi_shutdown_frontier_2-4058167/naive_shutdown_frontier.csv b/benchmark/results/frontier/2-dynampi_shutdown_frontier_2-4058167/naive_shutdown_frontier.csv new file mode 100644 index 0000000..d4ff365 --- /dev/null +++ b/benchmark/results/frontier/2-dynampi_shutdown_frontier_2-4058167/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,2,112,111,241.62,32133 diff --git a/benchmark/results/frontier/2-dynampi_ss_frontier_2-4054468/strong_scaling_frontier.csv b/benchmark/results/frontier/2-dynampi_ss_frontier_2-4054468/strong_scaling_frontier.csv new file mode 100644 index 0000000..7a55094 --- /dev/null +++ b/benchmark/results/frontier/2-dynampi_ss_frontier_2-4054468/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,2,112,111,35298152,35298152,26.4428,1.33488e+06 +frontier,naive,fixed,10000,200,10,2,112,111,15634096,15634096,11.6174,1.34574e+06 +frontier,naive,fixed,100000,200,10,2,112,111,6789104,6789104,10.2955,659425 +frontier,naive,fixed,1000000,200,10,2,112,111,1101815,1101815,10.1538,108512 +frontier,naive,fixed,10000000,200,10,2,112,111,94516,94516,10.1749,9289.14 +frontier,naive,fixed,100000000,200,10,2,112,111,5710,5710,10.1012,565.277 +frontier,naive,fixed,1000000000,200,10,2,112,111,1110,1110,10.0039,110.957 +frontier,naive,poisson,1000,200,10,2,112,111,35176771,35176771,26.607,1.32209e+06 +frontier,naive,poisson,10000,200,10,2,112,111,15495802,15495802,11.6326,1.3321e+06 +frontier,naive,poisson,100000,200,10,2,112,111,6764652,6764652,10.2972,656939 +frontier,naive,poisson,1000000,200,10,2,112,111,1101344,1101344,10.154,108464 +frontier,naive,poisson,10000000,200,10,2,112,111,94517,94517,10.1752,9288.99 +frontier,naive,poisson,100000000,200,10,2,112,111,5710,5710,10.1015,565.261 +frontier,naive,poisson,1000000000,200,10,2,112,111,1110,1110,10.0039,110.957 +frontier,hierarchical,fixed,1000,200,10,2,112,111,67410000,67410000,13.2779,5.07687e+06 +frontier,hierarchical,fixed,10000,200,10,2,112,111,34410000,34410000,10.5455,3.26301e+06 +frontier,hierarchical,fixed,100000,200,10,2,112,111,5502000,5502000,10.3741,530359 +frontier,hierarchical,fixed,1000000,200,10,2,112,111,222000,222000,10.0937,21994 +frontier,hierarchical,fixed,10000000,200,10,2,112,111,2220,2220,10.0154,221.659 +frontier,hierarchical,fixed,100000000,200,10,2,112,111,1110,1110,11.502,96.5049 +frontier,hierarchical,fixed,1000000000,200,10,2,112,111,111,111,12.0007,9.24947 +frontier,hierarchical,poisson,1000,200,10,2,112,111,66470000,66470000,13.3679,4.97237e+06 +frontier,hierarchical,poisson,10000,200,10,2,112,111,34410000,34410000,10.4962,3.27832e+06 +frontier,hierarchical,poisson,100000,200,10,2,112,111,5392000,5392000,10.2123,527993 +frontier,hierarchical,poisson,1000000,200,10,2,112,111,222000,222000,10.0446,22101.4 +frontier,hierarchical,poisson,10000000,200,10,2,112,111,11100,11100,11.1065,999.419 +frontier,hierarchical,poisson,100000000,200,10,2,112,111,222,222,22.204,9.99821 +frontier,hierarchical,poisson,1000000000,200,10,2,112,111,111,111,105.002,1.05712 diff --git a/benchmark/results/frontier/2-dynampi_ss_frontier_2-4054788/strong_scaling_frontier.csv b/benchmark/results/frontier/2-dynampi_ss_frontier_2-4054788/strong_scaling_frontier.csv new file mode 100644 index 0000000..d24e646 --- /dev/null +++ b/benchmark/results/frontier/2-dynampi_ss_frontier_2-4054788/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,2,112,111,5964918,10.0005,596460 +frontier,naive,fixed,10,10,2,112,111,6012204,10.0005,601190 +frontier,naive,fixed,100,10,2,112,111,4395156,10.0007,439485 +frontier,naive,fixed,1000,10,2,112,111,971028,10.0042,97061.6 +frontier,naive,fixed,10000,10,2,112,111,109668,10.0262,10938.1 +frontier,naive,fixed,100000,10,2,112,111,5932,10.3019,575.816 +frontier,naive,fixed,1000000,10,2,112,111,892,13.0023,68.6034 +frontier,naive,random,1,10,2,112,111,5974242,10.0005,597394 +frontier,naive,random,10,10,2,112,111,5954706,10.0005,595443 +frontier,naive,random,100,10,2,112,111,4828500,10.0006,482819 +frontier,naive,random,1000,10,2,112,111,682650,10.0067,68219.2 +frontier,naive,random,10000,10,2,112,111,69264,10.0634,6882.74 +frontier,naive,random,100000,10,2,112,111,7004,10.4999,667.051 +frontier,naive,random,1000000,10,2,112,111,1035,16.194,63.9127 +frontier,hierarchical,fixed,1,10,2,112,111,11324664,10.0003,1.13243e+06 +frontier,hierarchical,fixed,10,10,2,112,111,7390602,10.0005,739024 +frontier,hierarchical,fixed,100,10,2,112,111,2929956,10.0012,292960 +frontier,hierarchical,fixed,1000,10,2,112,111,279276,10.0127,27892.1 +frontier,hierarchical,fixed,10000,10,2,112,111,55722,10.0573,5540.44 +frontier,hierarchical,fixed,100000,10,2,112,111,5724,10.4027,550.239 +frontier,hierarchical,fixed,1000000,10,2,112,111,884,16.0009,55.2468 +frontier,hierarchical,random,1,10,2,112,111,11346864,10.0004,1.13465e+06 +frontier,hierarchical,random,10,10,2,112,111,6774552,10.0004,677426 +frontier,hierarchical,random,100,10,2,112,111,1307136,10.0028,130676 +frontier,hierarchical,random,1000,10,2,112,111,140304,10.0254,13994.9 +frontier,hierarchical,random,10000,10,2,112,111,19826,10.2549,1933.31 +frontier,hierarchical,random,100000,10,2,112,111,4624,10.5091,440 +frontier,hierarchical,random,1000000,10,2,112,111,664,15.1035,43.9634 diff --git a/benchmark/results/frontier/2-dynampi_ss_frontier_2-4058682/strong_scaling_frontier.csv b/benchmark/results/frontier/2-dynampi_ss_frontier_2-4058682/strong_scaling_frontier.csv new file mode 100644 index 0000000..fd10791 --- /dev/null +++ b/benchmark/results/frontier/2-dynampi_ss_frontier_2-4058682/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,2,112,111,7893820,10,789379 +frontier,naive,fixed,10,10,2,112,111,8001886,10.0002,800175 +frontier,naive,fixed,100,10,2,112,111,6614880,10.0003,661471 +frontier,naive,fixed,1000,10,2,112,111,1101559,10.0011,110144 +frontier,naive,fixed,10000,10,2,112,111,110970,10.012,11083.7 +frontier,naive,fixed,100000,10,2,112,111,11097,10.0028,1109.39 +frontier,naive,fixed,1000000,10,2,112,111,1097,10.0025,109.673 +frontier,naive,random,1,10,2,112,111,7953784,10,795377 +frontier,naive,random,10,10,2,112,111,8114285,10.0001,811417 +frontier,naive,random,100,10,2,112,111,6675135,10.0003,667496 +frontier,naive,random,1000,10,2,112,111,1102007,10.0019,110180 +frontier,naive,random,10000,10,2,112,111,111022,10.0195,11080.6 +frontier,naive,random,100000,10,2,112,111,11116,10.1779,1092.17 +frontier,naive,random,1000000,10,2,112,111,1182,11.9545,98.875 +frontier,hierarchical,fixed,1,10,2,112,111,21664284,10.0001,2.16642e+06 +frontier,hierarchical,fixed,10,10,2,112,111,21791375,10.0001,2.17911e+06 +frontier,hierarchical,fixed,100,10,2,112,111,8286109,10.0002,828596 +frontier,hierarchical,fixed,1000,10,2,112,111,1055918,10.0009,105582 +frontier,hierarchical,fixed,10000,10,2,112,111,108646,10.0139,10849.6 +frontier,hierarchical,fixed,100000,10,2,112,111,10891,10.0064,1088.4 +frontier,hierarchical,fixed,1000000,10,2,112,111,1062,10.0024,106.174 +frontier,hierarchical,random,1,10,2,112,111,21626884,10.0001,2.16267e+06 +frontier,hierarchical,random,10,10,2,112,111,21805222,10.0002,2.18049e+06 +frontier,hierarchical,random,100,10,2,112,111,6253514,10.0003,625335 +frontier,hierarchical,random,1000,10,2,112,111,686021,10.0007,68597.4 +frontier,hierarchical,random,10000,10,2,112,111,69032,10.0185,6890.42 +frontier,hierarchical,random,100000,10,2,112,111,6945,10.1851,681.875 +frontier,hierarchical,random,1000000,10,2,112,111,776,12.9328,60.0026 diff --git a/benchmark/results/frontier/2048-dynampi_shutdown_frontier_2048-4058262/naive_shutdown_frontier.csv b/benchmark/results/frontier/2048-dynampi_shutdown_frontier_2048-4058262/naive_shutdown_frontier.csv new file mode 100644 index 0000000..d14582a --- /dev/null +++ b/benchmark/results/frontier/2048-dynampi_shutdown_frontier_2048-4058262/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,2048,114688,114687,1.80677e+06,6 diff --git a/benchmark/results/frontier/256-dynampi_shutdown_frontier_256-4058174/naive_shutdown_frontier.csv b/benchmark/results/frontier/256-dynampi_shutdown_frontier_256-4058174/naive_shutdown_frontier.csv new file mode 100644 index 0000000..7faf780 --- /dev/null +++ b/benchmark/results/frontier/256-dynampi_shutdown_frontier_256-4058174/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,256,14336,14335,57377,158 diff --git a/benchmark/results/frontier/256-dynampi_ss_frontier_256-4054795/strong_scaling_frontier.csv b/benchmark/results/frontier/256-dynampi_ss_frontier_256-4054795/strong_scaling_frontier.csv new file mode 100644 index 0000000..3280998 --- /dev/null +++ b/benchmark/results/frontier/256-dynampi_ss_frontier_256-4054795/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,256,14336,14335,3641090,10.1474,358820 +frontier,naive,fixed,10,10,256,14336,14335,3641090,10.1216,359734 +frontier,naive,fixed,100,10,256,14336,14335,3211040,10.1722,315668 +frontier,naive,fixed,1000,10,256,14336,14335,2348163,10.1686,230924 +frontier,naive,fixed,10000,10,256,14336,14335,2144825,10.1629,211044 +frontier,naive,fixed,100000,10,256,14336,14335,759804,10.3724,73252.5 +frontier,naive,fixed,1000000,10,256,14336,14335,114684,13.0679,8776 +frontier,naive,random,1,10,256,14336,14335,3612420,10.083,358270 +frontier,naive,random,10,10,256,14336,14335,3641090,10.1019,360435 +frontier,naive,random,100,10,256,14336,14335,3182370,10.1686,312962 +frontier,naive,random,1000,10,256,14336,14335,2348381,10.2191,229803 +frontier,naive,random,10000,10,256,14336,14335,2201986,10.2039,215798 +frontier,naive,random,100000,10,256,14336,14335,820751,10.5145,78058.6 +frontier,naive,random,1000000,10,256,14336,14335,117328,14.6075,8032.06 +frontier,hierarchical,fixed,1,10,256,14336,14335,38303120,10.0181,3.8234e+06 +frontier,hierarchical,fixed,10,10,256,14336,14335,36869620,10.0111,3.68287e+06 +frontier,hierarchical,fixed,100,10,256,14336,14335,27437190,10.0386,2.73318e+06 +frontier,hierarchical,fixed,1000,10,256,14336,14335,48423630,10.0116,4.83676e+06 +frontier,hierarchical,fixed,10000,10,256,14336,14335,7081490,10.0751,702871 +frontier,hierarchical,fixed,100000,10,256,14336,14335,742780,10.4188,71292.1 +frontier,hierarchical,fixed,1000000,10,256,14336,14335,114460,16.007,7150.62 +frontier,hierarchical,random,1,10,256,14336,14335,37529030,10.0192,3.7457e+06 +frontier,hierarchical,random,10,10,256,14336,14335,36525580,10.0133,3.6477e+06 +frontier,hierarchical,random,100,10,256,14336,14335,25602310,10.0402,2.54998e+06 +frontier,hierarchical,random,1000,10,256,14336,14335,16169880,10.0606,1.60725e+06 +frontier,hierarchical,random,10000,10,256,14336,14335,4501190,10.1248,444573 +frontier,hierarchical,random,100000,10,256,14336,14335,514300,10.9927,46785.5 +frontier,hierarchical,random,1000000,10,256,14336,14335,85900,18.5246,4637.09 diff --git a/benchmark/results/frontier/256-dynampi_ss_frontier_256-4058689/strong_scaling_frontier.csv b/benchmark/results/frontier/256-dynampi_ss_frontier_256-4058689/strong_scaling_frontier.csv new file mode 100644 index 0000000..837d89a --- /dev/null +++ b/benchmark/results/frontier/256-dynampi_ss_frontier_256-4058689/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,256,14336,14335,7591021,10.0474,755523 +frontier,naive,fixed,10,10,256,14336,14335,8784967,10.0542,873762 +frontier,naive,fixed,100,10,256,14336,14335,6293517,10.0575,625755 +frontier,naive,fixed,1000,10,256,14336,14335,2566865,10.0564,255248 +frontier,naive,fixed,10000,10,256,14336,14335,2244467,10.0564,223188 +frontier,naive,fixed,100000,10,256,14336,14335,1432879,10.061,142419 +frontier,naive,fixed,1000000,10,256,14336,14335,141744,10.056,14095.4 +frontier,naive,random,1,10,256,14336,14335,8762174,10.0575,871208 +frontier,naive,random,10,10,256,14336,14335,8914463,10.0481,887176 +frontier,naive,random,100,10,256,14336,14335,6361136,10.0574,632485 +frontier,naive,random,1000,10,256,14336,14335,2543940,10.0565,252966 +frontier,naive,random,10000,10,256,14336,14335,2225075,10.0565,221258 +frontier,naive,random,100000,10,256,14336,14335,1437946,10.1976,141009 +frontier,naive,random,1000000,10,256,14336,14335,152165,11.9725,12709.6 +frontier,hierarchical,fixed,1,10,256,14336,14335,102776227,10.0011,1.02765e+07 +frontier,hierarchical,fixed,10,10,256,14336,14335,104438745,10.0011,1.04427e+07 +frontier,hierarchical,fixed,100,10,256,14336,14335,102038288,10.0013,1.02025e+07 +frontier,hierarchical,fixed,1000,10,256,14336,14335,99991272,10.0022,9.99695e+06 +frontier,hierarchical,fixed,10000,10,256,14336,14335,14029522,10.0223,1.39983e+06 +frontier,hierarchical,fixed,100000,10,256,14336,14335,1406124,10.0113,140454 +frontier,hierarchical,fixed,1000000,10,256,14336,14335,146216,12.0033,12181.4 +frontier,hierarchical,random,1,10,256,14336,14335,102651296,10.0011,1.0264e+07 +frontier,hierarchical,random,10,10,256,14336,14335,103828615,10.001,1.03818e+07 +frontier,hierarchical,random,100,10,256,14336,14335,99609765,10.0011,9.95988e+06 +frontier,hierarchical,random,1000,10,256,14336,14335,87244278,10.0034,8.7215e+06 +frontier,hierarchical,random,10000,10,256,14336,14335,8937662,10.0328,890845 +frontier,hierarchical,random,100000,10,256,14336,14335,909211,10.3464,87877 +frontier,hierarchical,random,1000000,10,256,14336,14335,113106,15.0599,7510.41 diff --git a/benchmark/results/frontier/32-dynampi_shutdown_frontier_32-4058171/naive_shutdown_frontier.csv b/benchmark/results/frontier/32-dynampi_shutdown_frontier_32-4058171/naive_shutdown_frontier.csv new file mode 100644 index 0000000..017eef8 --- /dev/null +++ b/benchmark/results/frontier/32-dynampi_shutdown_frontier_32-4058171/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,32,1792,1791,6760.42,1318 diff --git a/benchmark/results/frontier/32-dynampi_ss_frontier_32-4054472/strong_scaling_frontier.csv b/benchmark/results/frontier/32-dynampi_ss_frontier_32-4054472/strong_scaling_frontier.csv new file mode 100644 index 0000000..0d9bc15 --- /dev/null +++ b/benchmark/results/frontier/32-dynampi_ss_frontier_32-4054472/strong_scaling_frontier.csv @@ -0,0 +1,20 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,32,1792,1791,367958397,367958397,289.072,1.2729e+06 +frontier,naive,fixed,10000,200,10,32,1792,1791,47254631,47254631,36.9228,1.27982e+06 +frontier,naive,fixed,100000,200,10,32,1792,1791,9557970,9557970,15.4676,617935 +frontier,naive,fixed,1000000,200,10,32,1792,1791,3103406,3103406,11.2552,275730 +frontier,naive,fixed,10000000,200,10,32,1792,1791,1522516,1522516,10.1892,149424 +frontier,naive,fixed,100000000,200,10,32,1792,1791,91390,91390,10.1104,9039.25 +frontier,naive,fixed,1000000000,200,10,32,1792,1791,17910,17910,10.0724,1778.13 +frontier,naive,poisson,1000,200,10,32,1792,1791,368041162,368041162,286.459,1.2848e+06 +frontier,naive,poisson,10000,200,10,32,1792,1791,47195308,47195308,36.9883,1.27595e+06 +frontier,naive,poisson,100000,200,10,32,1792,1791,9544842,9544842,15.4791,616628 +frontier,naive,poisson,1000000,200,10,32,1792,1791,3099899,3099899,11.2648,275185 +frontier,naive,poisson,10000000,200,10,32,1792,1791,1522516,1522516,10.1869,149458 +frontier,naive,poisson,100000000,200,10,32,1792,1791,91390,91390,10.1095,9040 +frontier,naive,poisson,1000000000,200,10,32,1792,1791,17910,17910,10.072,1778.19 +frontier,hierarchical,fixed,1000,200,10,32,1792,1791,437019000,437019000,44.8812,9.73723e+06 +frontier,hierarchical,fixed,10000,200,10,32,1792,1791,123579000,123579000,13.5003,9.15383e+06 +frontier,hierarchical,fixed,100000,200,10,32,1792,1791,74622000,74622000,10.4339,7.15189e+06 +frontier,hierarchical,fixed,1000000,200,10,32,1792,1791,3582000,3582000,10.1708,352183 +frontier,hierarchical,fixed,10000000,200,10,32,1792,1791,71640,71640,17.0866,4192.76 diff --git a/benchmark/results/frontier/32-dynampi_ss_frontier_32-4054792/strong_scaling_frontier.csv b/benchmark/results/frontier/32-dynampi_ss_frontier_32-4054792/strong_scaling_frontier.csv new file mode 100644 index 0000000..c6158d8 --- /dev/null +++ b/benchmark/results/frontier/32-dynampi_ss_frontier_32-4054792/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,32,1792,1791,3864978,10.0167,385854 +frontier,naive,fixed,10,10,32,1792,1791,3900798,10.0098,389696 +frontier,naive,fixed,100,10,32,1792,1791,3406482,10.0207,339944 +frontier,naive,fixed,1000,10,32,1792,1791,2493072,10.0179,248861 +frontier,naive,fixed,10000,10,32,1792,1791,1318176,10.0496,131166 +frontier,naive,fixed,100000,10,32,1792,1791,94972,10.311,9210.77 +frontier,naive,fixed,1000000,10,32,1792,1791,14332,13.0091,1101.69 +frontier,naive,random,1,10,32,1792,1791,3882888,10.0106,387878 +frontier,naive,random,10,10,32,1792,1791,3875724,10.0126,387085 +frontier,naive,random,100,10,32,1792,1791,3420810,10.0194,341419 +frontier,naive,random,1000,10,32,1792,1791,2532474,10.0271,252563 +frontier,naive,random,10000,10,32,1792,1791,1063854,10.0585,105767 +frontier,naive,random,100000,10,32,1792,1791,104501,10.4119,10036.7 +frontier,naive,random,1000000,10,32,1792,1791,14678,14.4154,1018.22 +frontier,hierarchical,fixed,1,10,32,1792,1791,87418710,10.0008,8.74118e+06 +frontier,hierarchical,fixed,10,10,32,1792,1791,86935140,10.0004,8.69315e+06 +frontier,hierarchical,fixed,100,10,32,1792,1791,46444212,10.001,4.64398e+06 +frontier,hierarchical,fixed,1000,10,32,1792,1791,8245764,10.0071,823995 +frontier,hierarchical,fixed,10000,10,32,1792,1791,891918,10.0464,88779.8 +frontier,hierarchical,fixed,100000,10,32,1792,1791,92562,10.4094,8892.19 +frontier,hierarchical,fixed,1000000,10,32,1792,1791,14328,16.0028,895.344 +frontier,hierarchical,random,1,10,32,1792,1791,87411546,10.0005,8.74074e+06 +frontier,hierarchical,random,10,10,32,1792,1791,87741090,10.0005,8.77367e+06 +frontier,hierarchical,random,100,10,32,1792,1791,13092210,10.0032,1.3088e+06 +frontier,hierarchical,random,1000,10,32,1792,1791,1855476,10.0476,184669 +frontier,hierarchical,random,10000,10,32,1792,1791,489948,10.1839,48110.3 +frontier,hierarchical,random,100000,10,32,1792,1791,67998,11.1207,6114.54 +frontier,hierarchical,random,1000000,10,32,1792,1791,10746,17.0003,632.108 diff --git a/benchmark/results/frontier/32-dynampi_ss_frontier_32-4058686/strong_scaling_frontier.csv b/benchmark/results/frontier/32-dynampi_ss_frontier_32-4058686/strong_scaling_frontier.csv new file mode 100644 index 0000000..3881a00 --- /dev/null +++ b/benchmark/results/frontier/32-dynampi_ss_frontier_32-4058686/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,32,1792,1791,5889064,10.0008,588862 +frontier,naive,fixed,10,10,32,1792,1791,5936899,10.0017,593586 +frontier,naive,fixed,100,10,32,1792,1791,6554789,10.0069,655026 +frontier,naive,fixed,1000,10,32,1792,1791,2836200,10.0069,283425 +frontier,naive,fixed,10000,10,32,1792,1791,1788319,10.0095,178662 +frontier,naive,fixed,100000,10,32,1792,1791,179056,10.01,17887.7 +frontier,naive,fixed,1000000,10,32,1792,1791,17709,10.008,1769.48 +frontier,naive,random,1,10,32,1792,1791,5906985,10.0029,590527 +frontier,naive,random,10,10,32,1792,1791,5985074,10.0036,598293 +frontier,naive,random,100,10,32,1792,1791,6623444,10.0069,661888 +frontier,naive,random,1000,10,32,1792,1791,2837172,10.0068,283524 +frontier,naive,random,10000,10,32,1792,1791,1788483,10.0194,178502 +frontier,naive,random,100000,10,32,1792,1791,180255,10.1969,17677.4 +frontier,naive,random,1000000,10,32,1792,1791,19076,11.9674,1594 +frontier,hierarchical,fixed,1,10,32,1792,1791,102595667,10.0001,1.02594e+07 +frontier,hierarchical,fixed,10,10,32,1792,1791,104528982,10.0002,1.04527e+07 +frontier,hierarchical,fixed,100,10,32,1792,1791,101830529,10.0003,1.01828e+07 +frontier,hierarchical,fixed,1000,10,32,1792,1791,16973417,10.0026,1.6969e+06 +frontier,hierarchical,fixed,10000,10,32,1792,1791,1753021,10.0208,174938 +frontier,hierarchical,fixed,100000,10,32,1792,1791,177339,10.2067,17374.8 +frontier,hierarchical,fixed,1000000,10,32,1792,1791,18298,12.0026,1524.5 +frontier,hierarchical,random,1,10,32,1792,1791,102172549,10.0002,1.02171e+07 +frontier,hierarchical,random,10,10,32,1792,1791,104224311,10.0002,1.04223e+07 +frontier,hierarchical,random,100,10,32,1792,1791,94917455,10.0003,9.49141e+06 +frontier,hierarchical,random,1000,10,32,1792,1791,11023487,10.0034,1.10197e+06 +frontier,hierarchical,random,10000,10,32,1792,1791,1115757,10.0299,111243 +frontier,hierarchical,random,100000,10,32,1792,1791,113342,10.3054,10998.3 +frontier,hierarchical,random,1000000,10,32,1792,1791,14090,13.6719,1030.58 diff --git a/benchmark/results/frontier/4-dynampi_shutdown_frontier_4-4058168/naive_shutdown_frontier.csv b/benchmark/results/frontier/4-dynampi_shutdown_frontier_4-4058168/naive_shutdown_frontier.csv new file mode 100644 index 0000000..2000758 --- /dev/null +++ b/benchmark/results/frontier/4-dynampi_shutdown_frontier_4-4058168/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,4,224,223,672.43,12561 diff --git a/benchmark/results/frontier/4-dynampi_ss_frontier_4-4054469/strong_scaling_frontier.csv b/benchmark/results/frontier/4-dynampi_ss_frontier_4-4054469/strong_scaling_frontier.csv new file mode 100644 index 0000000..c36f4c3 --- /dev/null +++ b/benchmark/results/frontier/4-dynampi_ss_frontier_4-4054469/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,4,224,223,57163885,57163885,43.7275,1.30728e+06 +frontier,naive,fixed,10000,200,10,4,224,223,17713679,17713679,13.2581,1.33606e+06 +frontier,naive,fixed,100000,200,10,4,224,223,7005558,7005558,10.6738,656331 +frontier,naive,fixed,1000000,200,10,4,224,223,2194351,2194351,10.1571,216041 +frontier,naive,fixed,10000000,200,10,4,224,223,189716,189716,10.1758,18643.9 +frontier,naive,fixed,100000000,200,10,4,224,223,11422,11422,10.1027,1130.59 +frontier,naive,fixed,1000000000,200,10,4,224,223,2230,2230,10.0086,222.809 +frontier,naive,poisson,1000,200,10,4,224,223,57156290,57156290,43.9642,1.30006e+06 +frontier,naive,poisson,10000,200,10,4,224,223,17576611,17576611,13.2932,1.32222e+06 +frontier,naive,poisson,100000,200,10,4,224,223,6989526,6989526,10.6756,654719 +frontier,naive,poisson,1000000,200,10,4,224,223,2193603,2193603,10.157,215970 +frontier,naive,poisson,10000000,200,10,4,224,223,189718,189718,10.1766,18642.5 +frontier,naive,poisson,100000000,200,10,4,224,223,11422,11422,10.1021,1130.66 +frontier,naive,poisson,1000000000,200,10,4,224,223,2230,2230,10.0083,222.814 +frontier,hierarchical,fixed,1000,200,10,4,224,223,96134000,96134000,17.9539,5.3545e+06 +frontier,hierarchical,fixed,10000,200,10,4,224,223,29702000,29702000,11.0214,2.69494e+06 +frontier,hierarchical,fixed,100000,200,10,4,224,223,10526000,10526000,10.4088,1.01126e+06 +frontier,hierarchical,fixed,1000000,200,10,4,224,223,446000,446000,10.1488,43946.3 +frontier,hierarchical,fixed,10000000,200,10,4,224,223,17840,17840,12.7677,1397.28 +frontier,hierarchical,fixed,100000000,200,10,4,224,223,1784,1784,12.8029,139.344 +frontier,hierarchical,fixed,1000000000,200,10,4,224,223,223,223,16.0009,13.9367 +frontier,hierarchical,poisson,1000,200,10,4,224,223,96036000,96036000,17.9532,5.34925e+06 +frontier,hierarchical,poisson,10000,200,10,4,224,223,45606000,45606000,11.0327,4.13373e+06 +frontier,hierarchical,poisson,100000,200,10,4,224,223,10316000,10316000,10.2543,1.00602e+06 +frontier,hierarchical,poisson,1000000,200,10,4,224,223,446000,446000,10.159,43902 +frontier,hierarchical,poisson,10000000,200,10,4,224,223,13380,13380,10.307,1298.15 +frontier,hierarchical,poisson,100000000,200,10,4,224,223,1784,1784,12.8031,139.341 +frontier,hierarchical,poisson,1000000000,200,10,4,224,223,223,223,17.0007,13.1171 diff --git a/benchmark/results/frontier/4-dynampi_ss_frontier_4-4054789/strong_scaling_frontier.csv b/benchmark/results/frontier/4-dynampi_ss_frontier_4-4054789/strong_scaling_frontier.csv new file mode 100644 index 0000000..8be8e38 --- /dev/null +++ b/benchmark/results/frontier/4-dynampi_ss_frontier_4-4054789/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,4,224,223,4547862,10.0013,454728 +frontier,naive,fixed,10,10,4,224,223,4590232,10.0013,458964 +frontier,naive,fixed,100,10,4,224,223,3794122,10.0023,379327 +frontier,naive,fixed,1000,10,4,224,223,1621656,10.0036,162108 +frontier,naive,fixed,10000,10,4,224,223,215418,10.0275,21482.8 +frontier,naive,fixed,100000,10,4,224,223,11868,10.3026,1151.94 +frontier,naive,fixed,1000000,10,4,224,223,1788,13.0022,137.515 +frontier,naive,random,1,10,4,224,223,4574176,10.0019,457331 +frontier,naive,random,10,10,4,224,223,4588448,10.0013,458784 +frontier,naive,random,100,10,4,224,223,3928814,10.0012,392836 +frontier,naive,random,1000,10,4,224,223,1337108,10.0046,133649 +frontier,naive,random,10000,10,4,224,223,136922,10.0662,13602.1 +frontier,naive,random,100000,10,4,224,223,13407,10.3749,1292.25 +frontier,naive,random,1000000,10,4,224,223,1835,13.9344,131.689 +frontier,hierarchical,fixed,1,10,4,224,223,14149350,10.0004,1.41487e+06 +frontier,hierarchical,fixed,10,10,4,224,223,9828948,10.0008,982815 +frontier,hierarchical,fixed,100,10,4,224,223,8409330,10.001,840852 +frontier,hierarchical,fixed,1000,10,4,224,223,1080212,10.0062,107955 +frontier,hierarchical,fixed,10000,10,4,224,223,111946,10.0774,11108.7 +frontier,hierarchical,fixed,100000,10,4,224,223,10972,10.4031,1054.68 +frontier,hierarchical,fixed,1000000,10,4,224,223,1732,16.0011,108.243 +frontier,hierarchical,random,1,10,4,224,223,13635112,10.0005,1.36344e+06 +frontier,hierarchical,random,10,10,4,224,223,9365108,10.0007,936447 +frontier,hierarchical,random,100,10,4,224,223,1833952,10.0044,183315 +frontier,hierarchical,random,1000,10,4,224,223,221662,10.0397,22078.5 +frontier,hierarchical,random,10000,10,4,224,223,44568,10.2315,4355.97 +frontier,hierarchical,random,100000,10,4,224,223,8872,10.8119,820.581 +frontier,hierarchical,random,1000000,10,4,224,223,1312,16.1668,81.1539 diff --git a/benchmark/results/frontier/4-dynampi_ss_frontier_4-4058683/strong_scaling_frontier.csv b/benchmark/results/frontier/4-dynampi_ss_frontier_4-4058683/strong_scaling_frontier.csv new file mode 100644 index 0000000..6d56b94 --- /dev/null +++ b/benchmark/results/frontier/4-dynampi_ss_frontier_4-4058683/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,4,224,223,6717086,10.0004,671682 +frontier,naive,fixed,10,10,4,224,223,6774977,10.0005,677466 +frontier,naive,fixed,100,10,4,224,223,6577294,10.0007,657682 +frontier,naive,fixed,1000,10,4,224,223,2201640,10.001,220141 +frontier,naive,fixed,10000,10,4,224,223,222888,10.015,22255.4 +frontier,naive,fixed,100000,10,4,224,223,22295,10.0034,2228.74 +frontier,naive,fixed,1000000,10,4,224,223,2205,10.0026,220.442 +frontier,naive,random,1,10,4,224,223,6764440,10.0003,676421 +frontier,naive,random,10,10,4,224,223,6795236,10,679521 +frontier,naive,random,100,10,4,224,223,6659825,10.0007,665936 +frontier,naive,random,1000,10,4,224,223,2196061,10.002,219562 +frontier,naive,random,10000,10,4,224,223,223015,10.0188,22259.7 +frontier,naive,random,100000,10,4,224,223,22357,10.185,2195.09 +frontier,naive,random,1000000,10,4,224,223,2384,11.8718,200.812 +frontier,hierarchical,fixed,1,10,4,224,223,41529685,10.0001,4.15292e+06 +frontier,hierarchical,fixed,10,10,4,224,223,41717861,10.0001,4.17175e+06 +frontier,hierarchical,fixed,100,10,4,224,223,16354178,10.0001,1.63539e+06 +frontier,hierarchical,fixed,1000,10,4,224,223,2116933,10.0016,211659 +frontier,hierarchical,fixed,10000,10,4,224,223,218263,10.0136,21796.6 +frontier,hierarchical,fixed,100000,10,4,224,223,21879,10.0066,2186.46 +frontier,hierarchical,fixed,1000000,10,4,224,223,2304,12.0027,191.957 +frontier,hierarchical,random,1,10,4,224,223,41343892,10.0001,4.13435e+06 +frontier,hierarchical,random,10,10,4,224,223,41410367,10.0001,4.14098e+06 +frontier,hierarchical,random,100,10,4,224,223,12398434,10.0002,1.23981e+06 +frontier,hierarchical,random,1000,10,4,224,223,1375607,10.002,137533 +frontier,hierarchical,random,10000,10,4,224,223,139093,10.0195,13882.2 +frontier,hierarchical,random,100000,10,4,224,223,14076,10.1846,1382.08 +frontier,hierarchical,random,1000000,10,4,224,223,1499,12.7281,117.771 diff --git a/benchmark/results/frontier/4096-dynampi_shutdown_frontier_4096-4058263/naive_shutdown_frontier.csv b/benchmark/results/frontier/4096-dynampi_shutdown_frontier_4096-4058263/naive_shutdown_frontier.csv new file mode 100644 index 0000000..cc88034 --- /dev/null +++ b/benchmark/results/frontier/4096-dynampi_shutdown_frontier_4096-4058263/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,4096,229376,229375,8.7203e+06,2 diff --git a/benchmark/results/frontier/512-dynampi_shutdown_frontier_512-4058175/naive_shutdown_frontier.csv b/benchmark/results/frontier/512-dynampi_shutdown_frontier_512-4058175/naive_shutdown_frontier.csv new file mode 100644 index 0000000..35fe6f1 --- /dev/null +++ b/benchmark/results/frontier/512-dynampi_shutdown_frontier_512-4058175/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,512,28672,28671,147610,63 diff --git a/benchmark/results/frontier/512-dynampi_ss_frontier_512-4054796/strong_scaling_frontier.csv b/benchmark/results/frontier/512-dynampi_ss_frontier_512-4054796/strong_scaling_frontier.csv new file mode 100644 index 0000000..b74af59 --- /dev/null +++ b/benchmark/results/frontier/512-dynampi_ss_frontier_512-4054796/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,512,28672,28671,1593917,10.2563,155408 +frontier,naive,fixed,10,10,512,28672,28671,1595820,10.2644,155471 +frontier,naive,fixed,100,10,512,28672,28671,1607061,10.4485,153808 +frontier,naive,fixed,1000,10,512,28672,28671,1589578,10.6948,148631 +frontier,naive,fixed,10000,10,512,28672,28671,1691253,10.6247,159181 +frontier,naive,fixed,100000,10,512,28672,28671,1165716,10.5506,110488 +frontier,naive,fixed,1000000,10,512,28672,28671,230474,14.3831,16023.9 +frontier,naive,random,1,10,512,28672,28671,1595884,10.2749,155319 +frontier,naive,random,10,10,512,28672,28671,1614193,10.3073,156606 +frontier,naive,random,100,10,512,28672,28671,1608566,10.4461,153987 +frontier,naive,random,1000,10,512,28672,28671,1555596,10.715,145179 +frontier,naive,random,10000,10,512,28672,28671,1649861,10.6681,154654 +frontier,naive,random,100000,10,512,28672,28671,1591839,10.5636,150691 +frontier,naive,random,1000000,10,512,28672,28671,222850,14.6966,15163.4 +frontier,hierarchical,fixed,1,10,512,28672,28671,58144788,10.0262,5.79928e+06 +frontier,hierarchical,fixed,10,10,512,28672,28671,54532242,10.0341,5.43471e+06 +frontier,hierarchical,fixed,100,10,512,28672,28671,53614770,10.0315,5.34463e+06 +frontier,hierarchical,fixed,1000,10,512,28672,28671,100864578,10.0109,1.00755e+07 +frontier,hierarchical,fixed,10000,10,512,28672,28671,13876764,10.0646,1.37877e+06 +frontier,hierarchical,fixed,100000,10,512,28672,28671,1488060,10.4317,142648 +frontier,hierarchical,fixed,1000000,10,512,28672,28671,229368,16.0094,14327.1 +frontier,hierarchical,random,1,10,512,28672,28671,60209100,10.0223,6.00754e+06 +frontier,hierarchical,random,10,10,512,28672,28671,54417558,10.0345,5.42302e+06 +frontier,hierarchical,random,100,10,512,28672,28671,53156034,10.0623,5.28268e+06 +frontier,hierarchical,random,1000,10,512,28672,28671,41171556,10.0471,4.09785e+06 +frontier,hierarchical,random,10000,10,512,28672,28671,9174720,10.0892,909362 +frontier,hierarchical,random,100000,10,512,28672,28671,1032038,11.2,92145.9 +frontier,hierarchical,random,1000000,10,512,28672,28671,172026,18.6474,9225.22 diff --git a/benchmark/results/frontier/512-dynampi_ss_frontier_512-4058690/strong_scaling_frontier.csv b/benchmark/results/frontier/512-dynampi_ss_frontier_512-4058690/strong_scaling_frontier.csv new file mode 100644 index 0000000..791aa64 --- /dev/null +++ b/benchmark/results/frontier/512-dynampi_ss_frontier_512-4058690/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,512,28672,28671,8789361,10.0652,873239 +frontier,naive,fixed,10,10,512,28672,28671,9261687,10.0858,918294 +frontier,naive,fixed,100,10,512,28672,28671,3197232,10.0818,317128 +frontier,naive,fixed,1000,10,512,28672,28671,1553875,10.0182,155105 +frontier,naive,fixed,10000,10,512,28672,28671,1398919,10.0312,139457 +frontier,naive,fixed,100000,10,512,28672,28671,2319601,10.1141,229344 +frontier,naive,fixed,1000000,10,512,28672,28671,301158,11.1981,26893.7 +frontier,naive,random,1,10,512,28672,28671,8129334,10.1092,804154 +frontier,naive,random,10,10,512,28672,28671,9481135,10.084,940216 +frontier,naive,random,100,10,512,28672,28671,3279858,10.0788,325421 +frontier,naive,random,1000,10,512,28672,28671,1564771,10.0017,156450 +frontier,naive,random,10000,10,512,28672,28671,1424268,10.0149,142214 +frontier,naive,random,100000,10,512,28672,28671,2328254,10.1959,228352 +frontier,naive,random,1000000,10,512,28672,28671,302850,11.986,25267 +frontier,hierarchical,fixed,1,10,512,28672,28671,103676556,10.0021,1.03654e+07 +frontier,hierarchical,fixed,10,10,512,28672,28671,104640022,10.0022,1.04617e+07 +frontier,hierarchical,fixed,100,10,512,28672,28671,102250175,10.0022,1.02228e+07 +frontier,hierarchical,fixed,1000,10,512,28672,28671,100203438,10.0021,1.00183e+07 +frontier,hierarchical,fixed,10000,10,512,28672,28671,28054792,10.0233,2.79897e+06 +frontier,hierarchical,fixed,100000,10,512,28672,28671,2811605,10.0166,280696 +frontier,hierarchical,fixed,1000000,10,512,28672,28671,267492,10.0065,26731.7 +frontier,hierarchical,random,1,10,512,28672,28671,103940686,10.0022,1.03918e+07 +frontier,hierarchical,random,10,10,512,28672,28671,104776703,10.0021,1.04755e+07 +frontier,hierarchical,random,100,10,512,28672,28671,100195281,10.0021,1.00174e+07 +frontier,hierarchical,random,1000,10,512,28672,28671,97940942,10.0032,9.79096e+06 +frontier,hierarchical,random,10000,10,512,28672,28671,17868532,10.0335,1.78088e+06 +frontier,hierarchical,random,100000,10,512,28672,28671,1816371,10.3556,175400 +frontier,hierarchical,random,1000000,10,512,28672,28671,226880,15.1788,14947.2 diff --git a/benchmark/results/frontier/64-dynampi_shutdown_frontier_64-4058172/naive_shutdown_frontier.csv b/benchmark/results/frontier/64-dynampi_shutdown_frontier_64-4058172/naive_shutdown_frontier.csv new file mode 100644 index 0000000..ffc5a16 --- /dev/null +++ b/benchmark/results/frontier/64-dynampi_shutdown_frontier_64-4058172/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,64,3584,3583,14580.9,618 diff --git a/benchmark/results/frontier/64-dynampi_ss_frontier_64-4054473/strong_scaling_frontier.csv b/benchmark/results/frontier/64-dynampi_ss_frontier_64-4054473/strong_scaling_frontier.csv new file mode 100644 index 0000000..7246e50 --- /dev/null +++ b/benchmark/results/frontier/64-dynampi_ss_frontier_64-4054473/strong_scaling_frontier.csv @@ -0,0 +1,8 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,64,3584,3583,724123159,724123159,555.985,1.30242e+06 +frontier,naive,fixed,10000,200,10,64,3584,3583,81807696,81807696,63.4709,1.2889e+06 +frontier,naive,fixed,100000,200,10,64,3584,3583,12619811,12619811,20.968,601861 +frontier,naive,fixed,1000000,200,10,64,3584,3583,3369470,3369470,12.5931,267565 +frontier,naive,fixed,10000000,200,10,64,3584,3583,2129961,2129961,10.2558,207684 +frontier,naive,fixed,100000000,200,10,64,3584,3583,182782,182782,10.119,18063.2 +frontier,naive,fixed,1000000000,200,10,64,3584,3583,35830,35830,10.1455,3531.63 diff --git a/benchmark/results/frontier/64-dynampi_ss_frontier_64-4054793/strong_scaling_frontier.csv b/benchmark/results/frontier/64-dynampi_ss_frontier_64-4054793/strong_scaling_frontier.csv new file mode 100644 index 0000000..6641545 --- /dev/null +++ b/benchmark/results/frontier/64-dynampi_ss_frontier_64-4054793/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,64,3584,3583,3769316,10.0241,376024 +frontier,naive,fixed,10,10,64,3584,3583,3819478,10.0346,380629 +frontier,naive,fixed,100,10,64,3584,3583,3360854,10.0344,334934 +frontier,naive,fixed,1000,10,64,3584,3583,2508100,10.0548,249443 +frontier,naive,fixed,10000,10,64,3584,3583,1891824,10.0659,187943 +frontier,naive,fixed,100000,10,64,3584,3583,189948,10.3198,18406.1 +frontier,naive,fixed,1000000,10,64,3584,3583,28668,13.0166,2202.42 +frontier,naive,random,1,10,64,3584,3583,3783648,10.0193,377636 +frontier,naive,random,10,10,64,3584,3583,3790814,10.0365,377703 +frontier,naive,random,100,10,64,3584,3583,3368020,10.0229,336032 +frontier,naive,random,1000,10,64,3584,3583,2500934,10.0419,249049 +frontier,naive,random,10000,10,64,3584,3583,1992148,10.0493,198238 +frontier,naive,random,100000,10,64,3584,3583,208382,10.4557,19929.9 +frontier,naive,random,1000000,10,64,3584,3583,29371,14.5497,2018.67 +frontier,hierarchical,fixed,1,10,64,3584,3583,25768936,10.0039,2.57588e+06 +frontier,hierarchical,fixed,10,10,64,3584,3583,25797600,10.0054,2.57836e+06 +frontier,hierarchical,fixed,100,10,64,3584,3583,37600002,10.0085,3.7568e+06 +frontier,hierarchical,fixed,1000,10,64,3584,3583,10870822,10.0084,1.08617e+06 +frontier,hierarchical,fixed,10000,10,64,3584,3583,1777168,10.0764,176369 +frontier,hierarchical,fixed,100000,10,64,3584,3583,184252,10.4152,17690.7 +frontier,hierarchical,fixed,1000000,10,64,3584,3583,28492,16.0038,1780.33 +frontier,hierarchical,random,1,10,64,3584,3583,25991082,10.0051,2.59779e+06 +frontier,hierarchical,random,10,10,64,3584,3583,26550030,10.0043,2.65388e+06 +frontier,hierarchical,random,100,10,64,3584,3583,9322966,10.0112,931254 +frontier,hierarchical,random,1000,10,64,3584,3583,2780408,10.1031,275204 +frontier,hierarchical,random,10000,10,64,3584,3583,1053402,10.1678,103601 +frontier,hierarchical,random,100000,10,64,3584,3583,127612,10.7208,11903.2 +frontier,hierarchical,random,1000000,10,64,3584,3583,21412,17.7611,1205.56 diff --git a/benchmark/results/frontier/64-dynampi_ss_frontier_64-4058687/strong_scaling_frontier.csv b/benchmark/results/frontier/64-dynampi_ss_frontier_64-4058687/strong_scaling_frontier.csv new file mode 100644 index 0000000..9a1b85f --- /dev/null +++ b/benchmark/results/frontier/64-dynampi_ss_frontier_64-4058687/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,64,3584,3583,5808317,10.0098,580261 +frontier,naive,fixed,10,10,64,3584,3583,5803394,10.0008,580294 +frontier,naive,fixed,100,10,64,3584,3583,6500955,10.0141,649181 +frontier,naive,fixed,1000,10,64,3584,3583,2811407,10.014,280748 +frontier,naive,fixed,10000,10,64,3584,3583,2446281,10.0134,244300 +frontier,naive,fixed,100000,10,64,3584,3583,358202,10.0176,35757.2 +frontier,naive,fixed,1000000,10,64,3584,3583,35429,10.0151,3537.54 +frontier,naive,random,1,10,64,3584,3583,5818141,10.0004,581793 +frontier,naive,random,10,10,64,3584,3583,5843557,10.0034,584155 +frontier,naive,random,100,10,64,3584,3583,6593945,10.014,658474 +frontier,naive,random,1000,10,64,3584,3583,2817589,10.014,281365 +frontier,naive,random,10000,10,64,3584,3583,2450596,10.0199,244573 +frontier,naive,random,100000,10,64,3584,3583,360218,10.1953,35331.8 +frontier,naive,random,1000000,10,64,3584,3583,38099,11.9544,3187.03 +frontier,hierarchical,fixed,1,10,64,3584,3583,102416518,10.0003,1.02413e+07 +frontier,hierarchical,fixed,10,10,64,3584,3583,103691289,10.0003,1.03688e+07 +frontier,hierarchical,fixed,100,10,64,3584,3583,102024761,10.0003,1.02022e+07 +frontier,hierarchical,fixed,1000,10,64,3584,3583,33945141,10.002,3.39382e+06 +frontier,hierarchical,fixed,10000,10,64,3584,3583,3505327,10.0201,349828 +frontier,hierarchical,fixed,100000,10,64,3584,3583,351531,10.0073,35127.4 +frontier,hierarchical,fixed,1000000,10,64,3584,3583,36575,12.0023,3047.34 +frontier,hierarchical,random,1,10,64,3584,3583,102870506,10.0003,1.02867e+07 +frontier,hierarchical,random,10,10,64,3584,3583,104275771,10.0003,1.04273e+07 +frontier,hierarchical,random,100,10,64,3584,3583,99362749,10.0004,9.93592e+06 +frontier,hierarchical,random,1000,10,64,3584,3583,22053877,10.0035,2.20461e+06 +frontier,hierarchical,random,10000,10,64,3584,3583,2232854,10.0331,222548 +frontier,hierarchical,random,100000,10,64,3584,3583,227356,10.3569,21952.1 +frontier,hierarchical,random,1000000,10,64,3584,3583,28298,14.8682,1903.26 diff --git a/benchmark/results/frontier/8-dynampi_shutdown_frontier_8-4058169/naive_shutdown_frontier.csv b/benchmark/results/frontier/8-dynampi_shutdown_frontier_8-4058169/naive_shutdown_frontier.csv new file mode 100644 index 0000000..f985193 --- /dev/null +++ b/benchmark/results/frontier/8-dynampi_shutdown_frontier_8-4058169/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,8,448,447,1550.36,5607 diff --git a/benchmark/results/frontier/8-dynampi_ss_frontier_8-4054470/strong_scaling_frontier.csv b/benchmark/results/frontier/8-dynampi_ss_frontier_8-4054470/strong_scaling_frontier.csv new file mode 100644 index 0000000..f658708 --- /dev/null +++ b/benchmark/results/frontier/8-dynampi_ss_frontier_8-4054470/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_ns,round_target_ms,duration_s,nodes,world_size,workers,total_tasks,total_subtasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1000,200,10,8,448,447,101776305,101776305,77.0221,1.32139e+06 +frontier,naive,fixed,10000,200,10,8,448,447,21899094,21899094,16.565,1.32201e+06 +frontier,naive,fixed,100000,200,10,8,448,447,7336212,7336212,11.3312,647436 +frontier,naive,fixed,1000000,200,10,8,448,447,2867904,2867904,10.2727,279177 +frontier,naive,fixed,10000000,200,10,8,448,447,380116,380116,10.1772,37349.6 +frontier,naive,fixed,100000000,200,10,8,448,447,22846,22846,10.104,2261.09 +frontier,naive,fixed,1000000000,200,10,8,448,447,4470,4470,10.0177,446.208 +frontier,naive,poisson,1000,200,10,8,448,447,101587556,101587556,78.056,1.30147e+06 +frontier,naive,poisson,10000,200,10,8,448,447,21799432,21799432,16.592,1.31385e+06 +frontier,naive,poisson,100000,200,10,8,448,447,7326251,7326251,11.3322,646498 +frontier,naive,poisson,1000000,200,10,8,448,447,2864316,2864316,10.2738,278799 +frontier,naive,poisson,10000000,200,10,8,448,447,380116,380116,10.1773,37349.5 +frontier,naive,poisson,100000000,200,10,8,448,447,22846,22846,10.1032,2261.26 +frontier,naive,poisson,1000000000,200,10,8,448,447,4470,4470,10.0175,446.218 +frontier,hierarchical,fixed,1000,200,10,8,448,447,145209000,145209000,22.6738,6.40425e+06 +frontier,hierarchical,fixed,10000,200,10,8,448,447,62094000,62094000,11.3898,5.45172e+06 +frontier,hierarchical,fixed,100000,200,10,8,448,447,21903000,21903000,10.2302,2.14101e+06 +frontier,hierarchical,fixed,1000000,200,10,8,448,447,894000,894000,10.1507,88073.1 +frontier,hierarchical,fixed,10000000,200,10,8,448,447,26820,26820,12.8001,2095.3 +frontier,hierarchical,fixed,100000000,200,10,8,448,447,2682,2682,12.9028,207.862 +frontier,hierarchical,fixed,1000000000,200,10,8,448,447,447,447,22.0012,20.3171 +frontier,hierarchical,poisson,1000,200,10,8,448,447,145209000,145209000,22.6182,6.42001e+06 +frontier,hierarchical,poisson,10000,200,10,8,448,447,62604000,62604000,11.4339,5.47531e+06 +frontier,hierarchical,poisson,100000,200,10,8,448,447,21903000,21903000,10.2694,2.13285e+06 +frontier,hierarchical,poisson,1000000,200,10,8,448,447,894000,894000,10.1579,88010.1 +frontier,hierarchical,poisson,10000000,200,10,8,448,447,26820,26820,12.8009,2095.16 +frontier,hierarchical,poisson,100000000,200,10,8,448,447,2682,2682,12.9029,207.86 +frontier,hierarchical,poisson,1000000000,200,10,8,448,447,447,447,22.0009,20.3174 diff --git a/benchmark/results/frontier/8-dynampi_ss_frontier_8-4054790/strong_scaling_frontier.csv b/benchmark/results/frontier/8-dynampi_ss_frontier_8-4054790/strong_scaling_frontier.csv new file mode 100644 index 0000000..2956ca4 --- /dev/null +++ b/benchmark/results/frontier/8-dynampi_ss_frontier_8-4054790/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,8,448,447,4159782,10.0037,415825 +frontier,naive,fixed,10,10,8,448,447,4188390,10.0026,418730 +frontier,naive,fixed,100,10,8,448,447,3606396,10.0036,360509 +frontier,naive,fixed,1000,10,8,448,447,2225166,10.0048,222410 +frontier,naive,fixed,10000,10,8,448,447,413922,10.0388,41232.1 +frontier,naive,fixed,100000,10,8,448,447,23740,10.304,2303.97 +frontier,naive,fixed,1000000,10,8,448,447,3580,13.0029,275.323 +frontier,naive,random,1,10,8,448,447,4169616,10.003,416838 +frontier,naive,random,10,10,8,448,447,4181238,10.0025,418017 +frontier,naive,random,100,10,8,448,447,3650202,10.0041,364871 +frontier,naive,random,1000,10,8,448,447,2414694,10.0064,241316 +frontier,naive,random,10000,10,8,448,447,270882,10.0408,26978.2 +frontier,naive,random,100000,10,8,448,447,27023,10.5424,2563.28 +frontier,naive,random,1000000,10,8,448,447,4143,16.294,254.266 +frontier,hierarchical,fixed,1,10,8,448,447,44793870,10.0003,4.47926e+06 +frontier,hierarchical,fixed,10,10,8,448,447,26263932,10.0007,2.62622e+06 +frontier,hierarchical,fixed,100,10,8,448,447,16459434,10.0009,1.64579e+06 +frontier,hierarchical,fixed,1000,10,8,448,447,2182254,10.0042,218133 +frontier,hierarchical,fixed,10000,10,8,448,447,224394,10.0671,22289.7 +frontier,hierarchical,fixed,100000,10,8,448,447,22584,10.4036,2170.78 +frontier,hierarchical,fixed,1000000,10,8,448,447,3546,16.0012,221.608 +frontier,hierarchical,random,1,10,8,448,447,44670498,10.0002,4.46696e+06 +frontier,hierarchical,random,10,10,8,448,447,26506206,10.0004,2.65051e+06 +frontier,hierarchical,random,100,10,8,448,447,6727350,10.0024,672572 +frontier,hierarchical,random,1000,10,8,448,447,714306,10.0235,71262.8 +frontier,hierarchical,random,10000,10,8,448,447,113538,10.1494,11186.7 +frontier,hierarchical,random,100000,10,8,448,447,17790,10.9489,1624.82 +frontier,hierarchical,random,1000000,10,8,448,447,2682,16.0222,167.393 diff --git a/benchmark/results/frontier/8-dynampi_ss_frontier_8-4058684/strong_scaling_frontier.csv b/benchmark/results/frontier/8-dynampi_ss_frontier_8-4058684/strong_scaling_frontier.csv new file mode 100644 index 0000000..d71cd79 --- /dev/null +++ b/benchmark/results/frontier/8-dynampi_ss_frontier_8-4058684/strong_scaling_frontier.csv @@ -0,0 +1,29 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +frontier,naive,fixed,1,10,8,448,447,6177757,10.0006,617737 +frontier,naive,fixed,10,10,8,448,447,6248681,10.0003,624852 +frontier,naive,fixed,100,10,8,448,447,6571127,10.0015,657011 +frontier,naive,fixed,1000,10,8,448,447,2866258,10.0016,286581 +frontier,naive,fixed,10000,10,8,448,447,446655,10.015,44598.5 +frontier,naive,fixed,100000,10,8,448,447,44690,10.0044,4467.02 +frontier,naive,fixed,1000000,10,8,448,447,4420,10.0033,441.854 +frontier,naive,random,1,10,8,448,447,6195082,10.0002,619495 +frontier,naive,random,10,10,8,448,447,6278178,10.001,627753 +frontier,naive,random,100,10,8,448,447,6646979,10.0016,664593 +frontier,naive,random,1000,10,8,448,447,2868032,10.0018,286752 +frontier,naive,random,10000,10,8,448,447,446647,10.0193,44578.5 +frontier,naive,random,100000,10,8,448,447,44964,10.1863,4414.17 +frontier,naive,random,1000000,10,8,448,447,4781,11.9527,399.992 +frontier,hierarchical,fixed,1,10,8,448,447,80386914,10.0001,8.03859e+06 +frontier,hierarchical,fixed,10,10,8,448,447,80806349,10.0001,8.08052e+06 +frontier,hierarchical,fixed,100,10,8,448,447,32512772,10.0002,3.2512e+06 +frontier,hierarchical,fixed,1000,10,8,448,447,4237689,10.0029,423648 +frontier,hierarchical,fixed,10000,10,8,448,447,437522,10.0198,43665.9 +frontier,hierarchical,fixed,100000,10,8,448,447,43858,10.0067,4382.86 +frontier,hierarchical,fixed,1000000,10,8,448,447,4589,12.0025,382.337 +frontier,hierarchical,random,1,10,8,448,447,80384681,10.0002,8.03834e+06 +frontier,hierarchical,random,10,10,8,448,447,80660773,10.0001,8.06596e+06 +frontier,hierarchical,random,100,10,8,448,447,24679835,10.0003,2.4679e+06 +frontier,hierarchical,random,1000,10,8,448,447,2752429,10.003,275161 +frontier,hierarchical,random,10000,10,8,448,447,278803,10.0319,27791.6 +frontier,hierarchical,random,100000,10,8,448,447,28160,10.2183,2755.84 +frontier,hierarchical,random,1000000,10,8,448,447,3437,12.9098,266.231 diff --git a/benchmark/results/frontier/8192-dynampi_shutdown_frontier_8192-4058264/naive_shutdown_frontier.csv b/benchmark/results/frontier/8192-dynampi_shutdown_frontier_8192-4058264/naive_shutdown_frontier.csv new file mode 100644 index 0000000..befcc98 --- /dev/null +++ b/benchmark/results/frontier/8192-dynampi_shutdown_frontier_8192-4058264/naive_shutdown_frontier.csv @@ -0,0 +1,2 @@ +system,nodes,world_size,workers,time_per_shutdown_us,iterations +frontier,8192,458752,458751,3.55936e+07,1 diff --git a/benchmark/results/naive_shutdown_local.csv b/benchmark/results/naive_shutdown_local.csv new file mode 100644 index 0000000..518d327 --- /dev/null +++ b/benchmark/results/naive_shutdown_local.csv @@ -0,0 +1,45 @@ +system,nodes,world_size,workers,shutdown_time_s +local,1,1,1,5.48e-07 +local,1,1,1,4e-07 +local,1,1,1,3.14e-07 +local,1,1,1,1.577e-06 +local,1,1,1,1.127e-06 +local,1,1,1,1.332e-06 +local,1,2,1,3.322e-05 +local,1,4,3,1.4165e-05 +local,1,8,7,7.699e-05 +local,1,12,11,9.5137e-05 +local,1,1,1,1.316e-06 +local,1,2,1,4.623e-06 +local,1,4,3,1.4413e-05 +local,1,8,7,0.0120517 +local,1,12,11,0.000663585 +local,1,1,1,3.029e-06 +local,1,2,1,4.582e-06 +local,1,4,3,0.0113531 +local,1,8,7,0.0159826 +local,1,12,11,9.6134e-05 +local,1,1,1,1.075e-06,0 +local,1,2,1,6.343e-06,0 +local,1,4,3,1.8276e-05,0 +local,1,8,7,6.1534e-05,0 +local,1,12,11,9.8477e-05,0 +local,1,1,1,0.214144,1422282 +local,1,2,1,0.514655,587015 +local,1,4,3,0.565046,490827 +local,1,1,1,190888,1573402 +local,1,2,1,464309,653690 +local,1,4,3,472562,374580 +local,1,1,1,0.235832,1551905 +local,1,2,1,1.64211,605344 +local,1,4,3,3.47449,414542 +local,1,1,1,0.361011,457441 +local,1,2,1,1.32542,434603 +local,1,4,3,2.58194,452985 +local,1,8,7,4.15406,310733 +local,1,12,11,10.4031,138796 +local,1,1,1,0.245685,1545135 +local,1,2,1,1.20228,776076 +local,1,4,3,3.01309,412864 +local,1,8,7,3.61101,381144 +local,1,12,11,5.33916,288527 diff --git a/benchmark/results/strong_scaling_local.csv b/benchmark/results/strong_scaling_local.csv new file mode 100644 index 0000000..48d7ffa --- /dev/null +++ b/benchmark/results/strong_scaling_local.csv @@ -0,0 +1,148 @@ +system,distributor,mode,expected_us,duration_s,nodes,world_size,workers,total_tasks,elapsed_s,throughput_tasks_per_s +local,naive,fixed,1,10,1,1,1,4936879,10,493688 +local,naive,fixed,10,10,1,1,1,820802,10,82080.2 +local,naive,fixed,100,10,1,1,1,88892,10,8889.2 +local,naive,fixed,1000,10,1,1,1,8975,10,897.5 +local,naive,fixed,10000,10,1,1,1,900,10,90 +local,naive,fixed,100000,10,1,1,1,90,10,9 +local,naive,fixed,1000000,10,1,1,1,9,10,0.9 +local,naive,random,1,10,1,1,1,4842714,10,484271 +local,naive,random,10,10,1,1,1,820360,10,82036 +local,naive,random,100,10,1,1,1,89034,10,8903.4 +local,naive,random,1000,10,1,1,1,8928,10,892.8 +local,naive,random,10000,10,1,1,1,895,10,89.5 +local,naive,random,100000,10,1,1,1,76,10,7.6 +local,naive,random,1000000,10,1,1,1,11,13.2314,0.831355 +local,hierarchical,fixed,1,10,1,1,1,4700619,10,470062 +local,hierarchical,fixed,10,10,1,1,1,814999,10,81499.9 +local,hierarchical,fixed,100,10,1,1,1,88762,10,8876.2 +local,hierarchical,fixed,1000,10,1,1,1,8973,10,897.3 +local,hierarchical,fixed,10000,10,1,1,1,900,10,90 +local,hierarchical,fixed,100000,10,1,1,1,90,10,9 +local,hierarchical,fixed,1000000,10,1,1,1,9,10,0.9 +local,hierarchical,random,1,10,1,1,1,4638143,10,463814 +local,hierarchical,random,10,10,1,1,1,814497,10,81449.7 +local,hierarchical,random,100,10,1,1,1,88578,10,8857.8 +local,hierarchical,random,1000,10,1,1,1,9035,10,903.499 +local,hierarchical,random,10000,10,1,1,1,889,10,88.9 +local,hierarchical,random,100000,10,1,1,1,93,10,9.3 +local,hierarchical,random,1000000,10,1,1,1,18,15.0561,1.19553 +local,naive,fixed,1,10,2,2,1,3492706,10,349271 +local,naive,fixed,10,10,2,2,1,758488,10,75848.8 +local,naive,fixed,100,10,2,2,1,87606,10,8760.6 +local,naive,fixed,1000,10,2,2,1,8945,10,894.5 +local,naive,fixed,10000,10,2,2,1,900,10,90 +local,naive,fixed,100000,10,2,2,1,90,10,9 +local,naive,fixed,1000000,10,2,2,1,9,10,0.9 +local,naive,random,1,10,2,2,1,3357918,10,335792 +local,naive,random,10,10,2,2,1,758674,10,75867.4 +local,naive,random,100,10,2,2,1,87810,10,8781 +local,naive,random,1000,10,2,2,1,8875,10,887.5 +local,naive,random,10000,10,2,2,1,865,10,86.5 +local,naive,random,100000,10,2,2,1,97,10,9.7 +local,naive,random,1000000,10,2,2,1,10,10.0505,0.994972 +local,hierarchical,fixed,1,10,2,2,1,3354387,10,335439 +local,hierarchical,fixed,10,10,2,2,1,748999,10,74899.9 +local,hierarchical,fixed,100,10,2,2,1,87505,10,8750.5 +local,hierarchical,fixed,1000,10,2,2,1,8943,10,894.3 +local,hierarchical,fixed,10000,10,2,2,1,900,10,90 +local,hierarchical,fixed,100000,10,2,2,1,90,10,9 +local,hierarchical,fixed,1000000,10,2,2,1,9,10,0.9 +local,hierarchical,random,1,10,2,2,1,3190185,10,319018 +local,hierarchical,random,10,10,2,2,1,753267,10,75326.7 +local,hierarchical,random,100,10,2,2,1,86942,10,8694.2 +local,hierarchical,random,1000,10,2,2,1,9063,10,906.3 +local,hierarchical,random,10000,10,2,2,1,857,10,85.7 +local,hierarchical,random,100000,10,2,2,1,85,10,8.5 +local,hierarchical,random,1000000,10,2,2,1,14,13.7515,1.01807 +local,naive,fixed,1,10,4,4,3,8343450,10,834343 +local,naive,fixed,10,10,4,4,3,2315964,10,231595 +local,naive,fixed,100,10,4,4,3,289128,10.0004,28911.8 +local,naive,fixed,1000,10,4,4,3,29790,10.0034,2978 +local,naive,fixed,10000,10,4,4,3,3006,10.0315,299.656 +local,naive,fixed,100000,10,4,4,3,208,10.3024,20.1894 +local,naive,fixed,1000000,10,4,4,3,28,13.0005,2.15377 +local,naive,random,1,10,4,4,3,8392890,10,839288 +local,naive,random,10,10,4,4,3,2020056,10,202005 +local,naive,random,100,10,4,4,3,234588,10.0005,23457.7 +local,naive,random,1000,10,4,4,3,23946,10.0031,2393.86 +local,naive,random,10000,10,4,4,3,2430,10.0364,242.119 +local,naive,random,100000,10,4,4,3,237,10.2858,23.0415 +local,naive,random,1000000,10,4,4,3,33,13.5085,2.4429 +local,hierarchical,fixed,1,10,4,4,3,3501084,10,350108 +local,hierarchical,fixed,10,10,4,4,3,823800,10.0001,82379.1 +local,hierarchical,fixed,100,10,4,4,3,96918,10.0007,9691.11 +local,hierarchical,fixed,1000,10,4,4,3,11922,10.0052,1191.58 +local,hierarchical,fixed,10000,10,4,4,3,1206,10.0622,119.855 +local,hierarchical,fixed,100000,10,4,4,3,126,10.5028,11.9968 +local,hierarchical,fixed,1000000,10,4,4,3,18,18.0008,0.999957 +local,hierarchical,random,1,10,4,4,3,3967542,10,396753 +local,hierarchical,random,10,10,4,4,3,976926,10.0001,97691.7 +local,hierarchical,random,100,10,4,4,3,116142,10.0007,11613.4 +local,hierarchical,random,1000,10,4,4,3,11832,10.007,1182.37 +local,hierarchical,random,10000,10,4,4,3,1194,10.0701,118.569 +local,hierarchical,random,100000,10,4,4,3,120,11.0632,10.8468 +local,hierarchical,random,1000000,10,4,4,3,18,17.9428,1.00319 +local,naive,fixed,1,10,8,8,7,11848858,10,1.18488e+06 +local,naive,fixed,10,10,8,8,7,4682244,10.0001,468222 +local,naive,fixed,100,10,8,8,7,655046,10.0003,65502.6 +local,naive,fixed,1000,10,8,8,7,69244,10.0031,6922.22 +local,naive,fixed,10000,10,8,8,7,7014,10.037,698.817 +local,naive,fixed,100000,10,8,8,7,420,10.3031,40.7645 +local,naive,fixed,1000000,10,8,8,7,60,13.0006,4.61519 +local,naive,random,1,10,8,8,7,11858854,10,1.18588e+06 +local,naive,random,10,10,8,8,7,4099536,10,409952 +local,naive,random,100,10,8,8,7,493136,10.0005,49311 +local,naive,random,1000,10,8,8,7,50358,10.0041,5033.73 +local,naive,random,10000,10,8,8,7,5124,10.0377,510.477 +local,naive,random,100000,10,8,8,7,533,10.4457,51.0258 +local,naive,random,1000000,10,8,8,7,72,15.6575,4.59843 +local,hierarchical,fixed,1,10,8,8,7,3546732,10.0001,354671 +local,hierarchical,fixed,10,10,8,8,7,811972,10.0002,81195.6 +local,hierarchical,fixed,100,10,8,8,7,192766,10.0008,19275 +local,hierarchical,fixed,1000,10,8,8,7,19866,10.0097,1984.67 +local,hierarchical,fixed,10000,10,8,8,7,1092,10.1567,107.516 +local,hierarchical,fixed,100000,10,8,8,7,126,11.7033,10.7662 +local,hierarchical,fixed,1000000,10,8,8,7,28,26.0097,1.07652 +local,hierarchical,random,1,10,8,8,7,3300248,10,330023 +local,hierarchical,random,10,10,8,8,7,809284,10.0003,80926 +local,hierarchical,random,100,10,8,8,7,96670,10.0017,9665.38 +local,hierarchical,random,1000,10,8,8,7,9870,10.0117,985.849 +local,hierarchical,random,10000,10,8,8,7,1876,10.1227,185.326 +local,hierarchical,random,100000,10,8,8,7,224,11.0743,20.2269 +local,hierarchical,random,1000000,10,8,8,7,42,34.674,1.21128 +local,naive,fixed,1,10,12,12,11,12340922,10.0001,1.23409e+06 +local,naive,fixed,10,10,12,12,11,6491958,10.0001,649193 +local,naive,fixed,100,10,12,12,11,1009448,10.0003,100942 +local,naive,fixed,1000,10,12,12,11,108724,10.0023,10869.9 +local,naive,fixed,10000,10,12,12,11,11022,10.04,1097.81 +local,naive,fixed,100000,10,12,12,11,632,10.3033,61.3398 +local,naive,fixed,1000000,10,12,12,11,92,13.0007,7.07652 +local,naive,random,1,10,12,12,11,12441572,10,1.24415e+06 +local,naive,random,10,10,12,12,11,5863704,10.0001,586367 +local,naive,random,100,10,12,12,11,738232,10.0005,73819.8 +local,naive,random,1000,10,12,12,11,76098,10.0058,7605.43 +local,naive,random,10000,10,12,12,11,7678,10.0415,764.628 +local,naive,random,100000,10,12,12,11,756,10.3338,73.1582 +local,naive,random,1000000,10,12,12,11,107,15.4668,6.91806 +local,hierarchical,fixed,1,10,12,12,11,8097496,10.0001,809744 +local,hierarchical,fixed,10,10,12,12,11,1604834,10.0002,160479 +local,hierarchical,fixed,100,10,12,12,11,265936,10.0016,26589.3 +local,hierarchical,fixed,1000,10,12,12,11,27346,10.0155,2730.37 +local,hierarchical,fixed,10000,10,12,12,11,2772,10.0941,274.615 +local,hierarchical,fixed,100000,10,12,12,11,308,11.2037,27.4909 +local,hierarchical,fixed,1000000,10,12,12,11,66,24.001,2.74988 +local,hierarchical,random,1,10,12,12,11,7607182,10,760715 +local,hierarchical,random,10,10,12,12,11,2208470,10.0002,220844 +local,hierarchical,random,100,10,12,12,11,266860,10.0013,26682.4 +local,hierarchical,random,1000,10,12,12,11,27896,10.0129,2786.02 +local,hierarchical,random,10000,10,12,12,11,2838,10.1515,279.565 +local,hierarchical,random,100000,10,12,12,11,308,10.7896,28.546 +local,hierarchical,random,1000000,10,12,12,11,66,19.8541,3.32425 +local,naive,fixed,1,10,1,1,1,4508789,10,450879 +local,naive,fixed,10,10,1,1,1,825894,10,82589 +local,naive,fixed,1,10,1,1,1,4512569,10,451257 +local,naive,fixed,10000,10,1,8,7,6998,10.0104,699.073 +local,naive,random,10000,10,1,8,7,6509,10.0115,650.153 +local,hierarchical,fixed,10000,10,1,8,7,3995,10.0167,398.833 +local,hierarchical,random,10000,10,1,8,7,3439,10.0041,343.761 diff --git a/benchmark/scripts/aurora_queue_utils.sh b/benchmark/scripts/aurora_queue_utils.sh new file mode 100755 index 0000000..51f969d --- /dev/null +++ b/benchmark/scripts/aurora_queue_utils.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +# +# Aurora PBS queue helpers: enforce "only 1 job <256 nodes in queue" and "at most 2 running". +# Source this from submit_aurora_*.sh. Set SKIP_QUEUE_POLL=1 to disable waiting. + +# Poll interval in seconds. Override with AURORA_QUEUE_POLL_INTERVAL. +AURORA_QUEUE_POLL_INTERVAL="${AURORA_QUEUE_POLL_INTERVAL:-60}" + +# Count my jobs: running (state R). Assumes qstat -u output has state as second-to-last column. +_aurora_running_count() { + qstat -u "${USER}" 2>/dev/null | awk ' + NR > 5 && NF >= 2 && $(NF-1) == "R" { n++ } + END { print 0 + n } + ' +} + +# Count my jobs in debug-scaling (queued + running). Queue name is last column. +_aurora_debug_scaling_count() { + qstat -u "${USER}" 2>/dev/null | awk ' + NR > 5 && NF >= 2 && $NF == "debug-scaling" { n++ } + END { print 0 + n } + ' +} + +# Block until we are allowed to submit a job with this many nodes. +# Rules: only 1 job <256 nodes (debug-scaling) at a time; at most 2 jobs running. +wait_for_aurora_queue_space() { + local nodes="${1:?}" + if [[ -n "${SKIP_QUEUE_POLL:-}" ]]; then + return 0 + fi + while true; do + local running + running="$(_aurora_running_count)" + if [[ "${running}" -ge 2 ]]; then + echo "Aurora: ${running} jobs running (max 2); waiting ${AURORA_QUEUE_POLL_INTERVAL}s ..." + sleep "${AURORA_QUEUE_POLL_INTERVAL}" + continue + fi + if [[ "${nodes}" -lt 256 ]]; then + local in_debug + in_debug="$(_aurora_debug_scaling_count)" + if [[ "${in_debug}" -ge 1 ]]; then + echo "Aurora: ${in_debug} job(s) already in debug-scaling (max 1); waiting ${AURORA_QUEUE_POLL_INTERVAL}s ..." + sleep "${AURORA_QUEUE_POLL_INTERVAL}" + continue + fi + fi + return 0 + done +} diff --git a/benchmark/scripts/check_timer_resolution.sh b/benchmark/scripts/check_timer_resolution.sh new file mode 100755 index 0000000..9d747da --- /dev/null +++ b/benchmark/scripts/check_timer_resolution.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage: +# ./benchmark/scripts/check_timer_resolution.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/timer_resolution}" + +if [[ ! -f "${APP}" ]]; then + echo "Error: ${APP} not found. Please build the benchmark first." >&2 + echo "Run: cmake --build build --target timer_resolution" >&2 + exit 1 +fi + +"${APP}" diff --git a/benchmark/scripts/launch_aurora_naive_shutdown.sh b/benchmark/scripts/launch_aurora_naive_shutdown.sh new file mode 100755 index 0000000..5669844 --- /dev/null +++ b/benchmark/scripts/launch_aurora_naive_shutdown.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage (PBS, qsub): +# qsub -l select=512:ncpus=102:mpiprocs=102 -l walltime=00:15:00 launch_aurora_naive_shutdown.sh +# Or use the submit script: ./benchmark/scripts/submit_aurora_naive_shutdown.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/naive_shutdown_time}" +OUTPUT_DIR="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" +SYSTEM="aurora" + +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST:-1 2 4 8 16 32 64 128 256 512 1024 2048}" +IFS=' ' read -r -a RANKS_PER_NODE_LIST <<< "${RANKS_PER_NODE_LIST:-core}" +LAUNCHER="${LAUNCHER:-}" +IFS=' ' read -r -a LAUNCHER_ARGS <<< "${LAUNCHER_ARGS:-}" +if [[ -z "${LAUNCHER}" ]]; then + if command -v srun >/dev/null 2>&1; then + LAUNCHER="srun" + elif command -v mpiexec >/dev/null 2>&1; then + LAUNCHER="mpiexec" + elif command -v mpirun >/dev/null 2>&1; then + LAUNCHER="mpirun" + else + echo "No launcher found. Install srun, mpiexec, or mpirun." >&2 + exit 1 + fi +fi + +get_allocated_cores_per_node() { + if [[ -n "${PBS_NCPUS:-}" ]]; then + echo "${PBS_NCPUS}" + return + fi + if [[ -n "${CORES_PER_NODE:-}" ]]; then + echo "${CORES_PER_NODE}" + return + fi + if [[ -n "${NCPUS_PER_NODE:-}" ]]; then + echo "${NCPUS_PER_NODE}" + return + fi + echo 102 +} + +ALLOC_CORES_PER_NODE="$(get_allocated_cores_per_node)" +echo "Allocated cores per node: ${ALLOC_CORES_PER_NODE}" + +export FI_CXI_RX_MATCH_MODE=software + +mkdir -p "${OUTPUT_DIR}" +CSV="${OUTPUT_DIR}/naive_shutdown_${SYSTEM}.csv" + +for nodes in "${NODE_LIST[@]}"; do + for rpn in "${RANKS_PER_NODE_LIST[@]}"; do + if [[ "${rpn}" == "core" || "${rpn}" == "cores" ]]; then + ranks_per_node="${CORES_PER_NODE:-102}" + else + ranks_per_node="${rpn}" + fi + if [[ "${ranks_per_node}" -gt "${ALLOC_CORES_PER_NODE}" ]]; then + echo "Requested ranks_per_node=${ranks_per_node} exceeds allocation ${ALLOC_CORES_PER_NODE}" >&2 + exit 1 + fi + total_ranks=$((nodes * ranks_per_node)) + echo "Running ${SYSTEM} nodes=${nodes} ranks_per_node=${ranks_per_node}" + launcher_base="$(basename "${LAUNCHER}")" + if [[ "${launcher_base}" == mpiexec || "${launcher_base}" == mpirun ]]; then + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -n "${total_ranks}" --ppn "${ranks_per_node}" \ + "${APP}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + else + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -N "${nodes}" -n "${total_ranks}" \ + --ntasks-per-node="${ranks_per_node}" \ + "${APP}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + fi + done +done + +echo "Results written to ${CSV}" diff --git a/benchmark/scripts/launch_aurora_strong_scaling.sh b/benchmark/scripts/launch_aurora_strong_scaling.sh new file mode 100755 index 0000000..af93865 --- /dev/null +++ b/benchmark/scripts/launch_aurora_strong_scaling.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage (PBS, qsub): +# qsub -l select=1:ncpus=102:mpiprocs=102 -l walltime=02:00:00 launch_aurora_strong_scaling.sh +# Or use the submit script: ./benchmark/scripts/submit_aurora_strong_scaling.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/strong_scaling_distribution_rate}" +OUTPUT_DIR="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" +SYSTEM="aurora" + +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST:-1 2 4 8 16 32 64 128 256 512}" +IFS=' ' read -r -a TASK_US_LIST <<< "${TASK_US_LIST:-1 10 100 1000 10000 100000 1000000}" +IFS=' ' read -r -a DISTRIBUTIONS <<< "${DISTRIBUTIONS:-naive hierarchical}" +IFS=' ' read -r -a MODES <<< "${MODES:-fixed random}" +DURATION_S="${DURATION_S:-10}" +IFS=' ' read -r -a RANKS_PER_NODE_LIST <<< "${RANKS_PER_NODE_LIST:-core}" +LAUNCHER="${LAUNCHER:-}" +IFS=' ' read -r -a LAUNCHER_ARGS <<< "${LAUNCHER_ARGS:-}" +if [[ -z "${LAUNCHER}" ]]; then + if command -v srun >/dev/null 2>&1; then + LAUNCHER="srun" + elif command -v mpiexec >/dev/null 2>&1; then + LAUNCHER="mpiexec" + elif command -v mpirun >/dev/null 2>&1; then + LAUNCHER="mpirun" + else + echo "No launcher found. Install srun, mpiexec, or mpirun." >&2 + exit 1 + fi +fi + +get_allocated_cores_per_node() { + if [[ -n "${PBS_NCPUS:-}" ]]; then + echo "${PBS_NCPUS}" + return + fi + if [[ -n "${CORES_PER_NODE:-}" ]]; then + echo "${CORES_PER_NODE}" + return + fi + if [[ -n "${NCPUS_PER_NODE:-}" ]]; then + echo "${NCPUS_PER_NODE}" + return + fi + echo 102 +} + +ALLOC_CORES_PER_NODE="$(get_allocated_cores_per_node)" +echo "Allocated cores per node: ${ALLOC_CORES_PER_NODE}" + +export FI_CXI_RX_MATCH_MODE=software + +mkdir -p "${OUTPUT_DIR}" +CSV="${OUTPUT_DIR}/strong_scaling_${SYSTEM}.csv" + +for nodes in "${NODE_LIST[@]}"; do + for rpn in "${RANKS_PER_NODE_LIST[@]}"; do + if [[ "${rpn}" == "core" || "${rpn}" == "cores" ]]; then + ranks_per_node="${CORES_PER_NODE:-102}" + else + ranks_per_node="${rpn}" + fi + if [[ "${ranks_per_node}" -gt "${ALLOC_CORES_PER_NODE}" ]]; then + echo "Requested ranks_per_node=${ranks_per_node} exceeds allocation ${ALLOC_CORES_PER_NODE}" >&2 + exit 1 + fi + total_ranks=$((nodes * ranks_per_node)) + for dist in "${DISTRIBUTIONS[@]}"; do + # For Aurora, restrict to hierarchical distributor on 2048 nodes and above + if [[ "${SYSTEM}" == "aurora" && "${nodes}" -ge 2048 && "${dist}" != "hierarchical" ]]; then + continue + fi + for mode in "${MODES[@]}"; do + for expected_us in "${TASK_US_LIST[@]}"; do + echo "Running ${SYSTEM} nodes=${nodes} ranks_per_node=${ranks_per_node} dist=${dist} mode=${mode} expected_us=${expected_us}" + launcher_base="$(basename "${LAUNCHER}")" + if [[ "${launcher_base}" == mpiexec || "${launcher_base}" == mpirun ]]; then + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -n "${total_ranks}" --ppn "${ranks_per_node}" \ + "${APP}" \ + --distribution "${dist}" \ + --mode "${mode}" \ + --expected_us "${expected_us}" \ + --duration_s "${DURATION_S}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + else + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -N "${nodes}" -n "${total_ranks}" \ + --ntasks-per-node="${ranks_per_node}" \ + "${APP}" \ + --distribution "${dist}" \ + --mode "${mode}" \ + --expected_us "${expected_us}" \ + --duration_s "${DURATION_S}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + fi + done + done + done + done +done diff --git a/benchmark/scripts/launch_frontier_naive_shutdown.sh b/benchmark/scripts/launch_frontier_naive_shutdown.sh new file mode 100755 index 0000000..50145b8 --- /dev/null +++ b/benchmark/scripts/launch_frontier_naive_shutdown.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage: +# sbatch --nodes=512 --time=00:15:00 launch_frontier_naive_shutdown.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/naive_shutdown_time}" +OUTPUT_DIR="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" +SYSTEM="frontier" + +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST:-1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192}" +IFS=' ' read -r -a RANKS_PER_NODE_LIST <<< "${RANKS_PER_NODE_LIST:-core}" +LAUNCHER="${LAUNCHER:-}" +IFS=' ' read -r -a LAUNCHER_ARGS <<< "${LAUNCHER_ARGS:-}" +if [[ -z "${LAUNCHER}" ]]; then + if command -v srun >/dev/null 2>&1; then + LAUNCHER="srun" + elif command -v mpiexec >/dev/null 2>&1; then + LAUNCHER="mpiexec" + elif command -v mpirun >/dev/null 2>&1; then + LAUNCHER="mpirun" + else + echo "No launcher found. Install srun, mpiexec, or mpirun." >&2 + exit 1 + fi +fi + +mkdir -p "${OUTPUT_DIR}" +CSV="${OUTPUT_DIR}/naive_shutdown_${SYSTEM}.csv" + +for nodes in "${NODE_LIST[@]}"; do + for rpn in "${RANKS_PER_NODE_LIST[@]}"; do + if [[ "${rpn}" == "core" || "${rpn}" == "cores" ]]; then + if [[ -n "${SLURM_JOB_CPUS_PER_NODE:-}" ]]; then + ranks_per_node="${SLURM_JOB_CPUS_PER_NODE%%(*}" + ranks_per_node="${ranks_per_node%%,*}" + else + ranks_per_node="${CORES_PER_NODE:-56}" + fi + else + ranks_per_node="${rpn}" + fi + total_ranks=$((nodes * ranks_per_node)) + echo "Running ${SYSTEM} nodes=${nodes} ranks_per_node=${ranks_per_node}" + launcher_base="$(basename "${LAUNCHER}")" + if [[ "${launcher_base}" == mpiexec || "${launcher_base}" == mpirun ]]; then + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -n "${total_ranks}" --ppn "${ranks_per_node}" \ + "${APP}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + else + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -N "${nodes}" -n "${total_ranks}" \ + --ntasks-per-node="${ranks_per_node}" \ + "${APP}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + fi + done +done + +echo "Results written to ${CSV}" diff --git a/benchmark/scripts/launch_frontier_strong_scaling.sh b/benchmark/scripts/launch_frontier_strong_scaling.sh new file mode 100755 index 0000000..7179eb9 --- /dev/null +++ b/benchmark/scripts/launch_frontier_strong_scaling.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage: +# sbatch --nodes=8096 --time=02:00:00 launch_frontier_strong_scaling.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/strong_scaling_distribution_rate}" +OUTPUT_DIR="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" +SYSTEM="frontier" + +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST:-1 2 4 8 16 32 64 128 256 512}" +IFS=' ' read -r -a TASK_US_LIST <<< "${TASK_US_LIST:-1 10 100 1000 10000 100000 1000000}" +IFS=' ' read -r -a DISTRIBUTIONS <<< "${DISTRIBUTIONS:-naive hierarchical}" +IFS=' ' read -r -a MODES <<< "${MODES:-fixed random}" +DURATION_S="${DURATION_S:-10}" +IFS=' ' read -r -a RANKS_PER_NODE_LIST <<< "${RANKS_PER_NODE_LIST:-core}" +LAUNCHER="${LAUNCHER:-}" +IFS=' ' read -r -a LAUNCHER_ARGS <<< "${LAUNCHER_ARGS:-}" +if [[ -z "${LAUNCHER}" ]]; then + if command -v srun >/dev/null 2>&1; then + LAUNCHER="srun" + elif command -v mpiexec >/dev/null 2>&1; then + LAUNCHER="mpiexec" + elif command -v mpirun >/dev/null 2>&1; then + LAUNCHER="mpirun" + else + echo "No launcher found. Install srun, mpiexec, or mpirun." >&2 + exit 1 + fi +fi + +mkdir -p "${OUTPUT_DIR}" +CSV="${OUTPUT_DIR}/strong_scaling_${SYSTEM}.csv" + +for nodes in "${NODE_LIST[@]}"; do + for rpn in "${RANKS_PER_NODE_LIST[@]}"; do + if [[ "${rpn}" == "core" || "${rpn}" == "cores" ]]; then + if [[ -n "${SLURM_JOB_CPUS_PER_NODE:-}" ]]; then + ranks_per_node="${SLURM_JOB_CPUS_PER_NODE%%(*}" + ranks_per_node="${ranks_per_node%%,*}" + else + ranks_per_node="${CORES_PER_NODE:-56}" + fi + else + ranks_per_node="${rpn}" + fi + total_ranks=$((nodes * ranks_per_node)) + for dist in "${DISTRIBUTIONS[@]}"; do + for mode in "${MODES[@]}"; do + for expected_us in "${TASK_US_LIST[@]}"; do + echo "Running ${SYSTEM} nodes=${nodes} ranks_per_node=${ranks_per_node} dist=${dist} mode=${mode} expected_us=${expected_us}" + launcher_base="$(basename "${LAUNCHER}")" + if [[ "${launcher_base}" == mpiexec || "${launcher_base}" == mpirun ]]; then + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -n "${total_ranks}" --ppn "${ranks_per_node}" \ + "${APP}" \ + --distribution "${dist}" \ + --mode "${mode}" \ + --expected_us "${expected_us}" \ + --duration_s "${DURATION_S}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + else + "${LAUNCHER}" "${LAUNCHER_ARGS[@]}" -N "${nodes}" -n "${total_ranks}" \ + --ntasks-per-node="${ranks_per_node}" \ + "${APP}" \ + --distribution "${dist}" \ + --mode "${mode}" \ + --expected_us "${expected_us}" \ + --duration_s "${DURATION_S}" \ + --nodes "${nodes}" \ + --system "${SYSTEM}" \ + --output "${CSV}" + fi + done + done + done + done +done diff --git a/benchmark/scripts/launch_local_naive_shutdown.sh b/benchmark/scripts/launch_local_naive_shutdown.sh new file mode 100755 index 0000000..e731a82 --- /dev/null +++ b/benchmark/scripts/launch_local_naive_shutdown.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage: +# ./benchmark/scripts/launch_local_naive_shutdown.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/naive_shutdown_time}" +OUTPUT_DIR="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" +SYSTEM="local" + +IFS=' ' read -r -a RANK_LIST <<< "${RANK_LIST:-1 2 4 8 12}" +LAUNCHER="${LAUNCHER:-}" +IFS=' ' read -r -a LAUNCHER_ARGS <<< "${LAUNCHER_ARGS:-}" + +if [[ -z "${LAUNCHER}" ]]; then + if command -v mpirun >/dev/null 2>&1; then + LAUNCHER="mpirun" + elif command -v mpiexec >/dev/null 2>&1; then + LAUNCHER="mpiexec" + else + echo "No launcher found. Install mpirun or mpiexec." >&2 + exit 1 + fi +fi + +mkdir -p "${OUTPUT_DIR}" +CSV="${OUTPUT_DIR}/naive_shutdown_${SYSTEM}.csv" + +for ranks in "${RANK_LIST[@]}"; do + echo "Running ${SYSTEM} ranks=${ranks}" + launcher_base="$(basename "${LAUNCHER}")" + if [[ "${launcher_base}" == mpiexec ]]; then + "${LAUNCHER}" ${LAUNCHER_ARGS[@]+"${LAUNCHER_ARGS[@]}"} -n "${ranks}" \ + "${APP}" \ + --nodes 1 \ + --system "${SYSTEM}" \ + --output "${CSV}" + else + "${LAUNCHER}" ${LAUNCHER_ARGS[@]+"${LAUNCHER_ARGS[@]}"} -np "${ranks}" \ + "${APP}" \ + --nodes 1 \ + --system "${SYSTEM}" \ + --output "${CSV}" + fi +done + +echo "Results written to ${CSV}" diff --git a/benchmark/scripts/launch_local_strong_scaling.sh b/benchmark/scripts/launch_local_strong_scaling.sh new file mode 100755 index 0000000..dae075c --- /dev/null +++ b/benchmark/scripts/launch_local_strong_scaling.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Example usage: +# ./benchmark/scripts/launch_local_strong_scaling.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +APP="${APP:-${ROOT_DIR}/build/benchmark/strong_scaling_distribution_rate}" +OUTPUT_DIR="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" +SYSTEM="local" + +IFS=' ' read -r -a RANK_LIST <<< "${RANK_LIST:-1 2 4 8 12}" +IFS=' ' read -r -a TASK_US_LIST <<< "${TASK_US_LIST:-1 10 100 1000 10000 100000 1000000}" +IFS=' ' read -r -a DISTRIBUTIONS <<< "${DISTRIBUTIONS:-naive hierarchical}" +IFS=' ' read -r -a MODES <<< "${MODES:-fixed random}" +DURATION_S="${DURATION_S:-10}" +LAUNCHER="${LAUNCHER:-}" +IFS=' ' read -r -a LAUNCHER_ARGS <<< "${LAUNCHER_ARGS:-}" + +if [[ -z "${LAUNCHER}" ]]; then + if command -v mpirun >/dev/null 2>&1; then + LAUNCHER="mpirun" + elif command -v mpiexec >/dev/null 2>&1; then + LAUNCHER="mpiexec" + else + echo "No launcher found. Install mpirun or mpiexec." >&2 + exit 1 + fi +fi + +mkdir -p "${OUTPUT_DIR}" +CSV="${OUTPUT_DIR}/strong_scaling_${SYSTEM}.csv" + +for ranks in "${RANK_LIST[@]}"; do + for dist in "${DISTRIBUTIONS[@]}"; do + for mode in "${MODES[@]}"; do + for expected_us in "${TASK_US_LIST[@]}"; do + echo "Running ${SYSTEM} ranks=${ranks} dist=${dist} mode=${mode} expected_us=${expected_us}" + launcher_base="$(basename "${LAUNCHER}")" + if [[ "${launcher_base}" == mpiexec ]]; then + "${LAUNCHER}" ${LAUNCHER_ARGS[@]+"${LAUNCHER_ARGS[@]}"} -n "${ranks}" \ + "${APP}" \ + --distribution "${dist}" \ + --mode "${mode}" \ + --expected_us "${expected_us}" \ + --duration_s "${DURATION_S}" \ + --nodes 1 \ + --system "${SYSTEM}" \ + --output "${CSV}" + else + "${LAUNCHER}" ${LAUNCHER_ARGS[@]+"${LAUNCHER_ARGS[@]}"} -np "${ranks}" \ + "${APP}" \ + --distribution "${dist}" \ + --mode "${mode}" \ + --expected_us "${expected_us}" \ + --duration_s "${DURATION_S}" \ + --nodes 1 \ + --system "${SYSTEM}" \ + --output "${CSV}" + fi + done + done + done +done + +echo "Results written to ${CSV}" diff --git a/benchmark/scripts/plot_shutdown_time.py b/benchmark/scripts/plot_shutdown_time.py new file mode 100755 index 0000000..a8e4a71 --- /dev/null +++ b/benchmark/scripts/plot_shutdown_time.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +import argparse +import csv +import os +from collections import defaultdict + +import matplotlib.pyplot as plt +from matplotlib.ticker import FixedLocator, FuncFormatter +import scienceplots # noqa: F401 # registers matplotlib styles + +# IEEE styling parameters +IEEE_FIG_WIDTH = 3.5 # Single column width in inches +IEEE_FIG_HEIGHT = 3.5 # Height in inches + +# Hollow marker shapes for different series +MARKER_SHAPES = ['o', 's', '^', 'v', 'D', 'p', '*', 'h', 'X', '<', '>', 'd'] + + +def collect_csv_paths(inputs): + paths = [] + for raw in inputs: + for entry in raw.split(","): + entry = entry.strip() + if not entry: + continue + if os.path.isdir(entry): + for root, _, files in os.walk(entry): + for name in files: + # Only collect shutdown-related CSV files + if name.endswith(".csv") and "shutdown" in name.lower(): + paths.append(os.path.join(root, name)) + else: + # Only add if it's a shutdown CSV file + if "shutdown" in os.path.basename(entry).lower(): + paths.append(entry) + return paths + + +def parse_rows(paths): + rows = [] + for path in paths: + file_mtime = os.path.getmtime(path) + with open(path, "r", encoding="utf-8") as handle: + reader = csv.DictReader(handle) + for row in reader: + # Skip rows that don't have the time_per_shutdown_us column + if "time_per_shutdown_us" not in row: + continue + nodes = int(float(row.get("nodes", 0))) + world_size = int(float(row.get("world_size", 0))) + workers = int(float(row.get("workers", 0))) + time_per_shutdown_us = float(row.get("time_per_shutdown_us", 0.0)) + # Skip rows with zero or invalid shutdown times + if time_per_shutdown_us <= 0.0: + continue + rows.append( + { + "system": row.get("system", "").strip() or "unknown", + "nodes": nodes, + "world_size": world_size, + "workers": workers, + "time_per_shutdown_us": time_per_shutdown_us, + "file_mtime": file_mtime, + } + ) + return rows + + +def group_rows(rows): + # First, filter to keep only newest results for each unique configuration + # Key: (system, nodes) + # Value: (time_per_shutdown_us, file_mtime) + newest_by_config = {} + for row in rows: + config_key = ( + row["system"], + row["nodes"], + ) + if config_key not in newest_by_config: + newest_by_config[config_key] = (row["time_per_shutdown_us"], row["file_mtime"]) + else: + # Keep the one from the newest file + _, existing_mtime = newest_by_config[config_key] + if row["file_mtime"] > existing_mtime: + newest_by_config[config_key] = (row["time_per_shutdown_us"], row["file_mtime"]) + + # Now group by system for plotting + grouped = defaultdict(list) + for (system, nodes), (time_per_shutdown_us, _) in newest_by_config.items(): + grouped[system].append((nodes, time_per_shutdown_us)) + return grouped + + +def plot_all_systems(grouped, output_dir, image_format): + # Use scienceplots IEEE style + with plt.style.context(['science', 'ieee']): + fig, ax = plt.subplots(figsize=(IEEE_FIG_WIDTH, IEEE_FIG_HEIGHT)) + + # Filter out "local" system + systems = sorted([s for s in grouped.keys() if s != "local"]) + all_nodes = set() + handles = [] + labels = [] + + # Plot each system with different markers/colors + for idx, system in enumerate(systems): + points = grouped[system] + points_sorted = sorted(points, key=lambda x: x[0]) + nodes = [p[0] for p in points_sorted] + time_per_shutdown_us = [p[1] for p in points_sorted] + # Convert microseconds to seconds + time_per_shutdown_s = [t / 1_000_000.0 for t in time_per_shutdown_us] + + all_nodes.update(nodes) + + marker = MARKER_SHAPES[idx % len(MARKER_SHAPES)] + color = plt.cm.tab10(idx % 10) + + # Plot data + line, = ax.plot(nodes, time_per_shutdown_s, marker=marker, fillstyle='none', + markeredgewidth=1.0, linewidth=1.0, color=color, label=system.capitalize()) + handles.append(line) + labels.append(system.capitalize()) + + ax.set_xlabel("Nodes") + ax.set_ylabel("Shutdown time (s)") + ax.set_xscale("log", base=2) + ax.set_yscale("log") + + # Show actual node counts (1, 2, 4, 8, 16, ...) rather than 2^n formatting. + # Keep the log2 spacing but format ticks as plain integers. + if all_nodes: + node_ticks = sorted(all_nodes) + ax.xaxis.set_major_locator(FixedLocator(node_ticks)) + ax.xaxis.set_major_formatter(FuncFormatter(lambda x, pos: f"{int(x)}")) + + # Add very light grey underlying grid + ax.grid(True, which="both", linestyle="-", linewidth=0.5, color='lightgrey', alpha=0.5, zorder=0) + + # Add legend + ax.legend(handles, labels, frameon=False, loc='best') + + filename = f"shutdown_time_combined.{image_format}" + fig.tight_layout() + fig.savefig(os.path.join(output_dir, filename), dpi=300, bbox_inches='tight') + plt.close(fig) + + +def main(): + parser = argparse.ArgumentParser(description="Plot shutdown time vs number of nodes.") + parser.add_argument( + "--input", + required=True, + action="append", + help="CSV file or directory (can be passed multiple times)", + ) + parser.add_argument( + "--output-dir", required=True, help="Directory to write output plots" + ) + parser.add_argument( + "--format", default="png", choices=["png", "pdf", "svg"], help="Output image format" + ) + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + input_paths = collect_csv_paths(args.input) + rows = parse_rows(input_paths) + grouped = group_rows(rows) + + # Plot all systems on the same figure + plot_all_systems(grouped, args.output_dir, args.format) + + +if __name__ == "__main__": + main() diff --git a/benchmark/scripts/plot_strong_scaling.py b/benchmark/scripts/plot_strong_scaling.py new file mode 100755 index 0000000..26ace5c --- /dev/null +++ b/benchmark/scripts/plot_strong_scaling.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +import argparse +import csv +import os +from collections import defaultdict + +import matplotlib.pyplot as plt +from matplotlib.ticker import FixedLocator, FuncFormatter +import scienceplots # noqa: F401 # registers matplotlib styles + +# IEEE styling parameters +IEEE_FIG_WIDTH = 3.5 # Single column width in inches +IEEE_FIG_HEIGHT = 3.5 # Height in inches (increased for bottom legend) + +# Hollow marker shapes for different series +MARKER_SHAPES = ['o', 's', '^', 'v', 'D', 'p', '*', 'h', 'X', '<', '>', 'd'] + + +def format_duration(expected_ns): + if expected_ns <= 0: + return "0 ns" + if expected_ns >= 1_000_000_000: + return f"{expected_ns / 1_000_000_000:g} s" + if expected_ns >= 1_000_000: + return f"{expected_ns / 1_000_000:g} ms" + if expected_ns >= 1_000: + return f"{expected_ns / 1_000:g} us" + return f"{expected_ns:g} ns" + + +def collect_csv_paths(inputs): + paths = [] + for raw in inputs: + for entry in raw.split(","): + entry = entry.strip() + if not entry: + continue + if os.path.isdir(entry): + for root, _, files in os.walk(entry): + for name in files: + # Only collect strong scaling CSV files + if name.endswith(".csv") and "strong_scaling" in name.lower(): + paths.append(os.path.join(root, name)) + else: + # Only add if it's a strong scaling CSV file + if "strong_scaling" in os.path.basename(entry).lower(): + paths.append(entry) + return paths + + +def parse_rows(paths): + rows = [] + for path in paths: + file_mtime = os.path.getmtime(path) + with open(path, "r", encoding="utf-8") as handle: + reader = csv.DictReader(handle) + for row in reader: + expected_ns_raw = row.get("expected_ns", "").strip() + expected_us_raw = row.get("expected_us", "").strip() + if expected_ns_raw: + expected_ns = int(float(expected_ns_raw)) + elif expected_us_raw: + expected_ns = int(float(expected_us_raw) * 1000) + else: + expected_ns = 0 + nodes = int(float(row.get("nodes", 0))) + world_size = int(float(row.get("world_size", 0))) + ranks_per_node = int(round(world_size / nodes)) if nodes else 0 + rows.append( + { + "system": row.get("system", "").strip() or "unknown", + "distributor": row.get("distributor", "").strip(), + "mode": row.get("mode", "").strip(), + "expected_ns": expected_ns, + "nodes": nodes, + "ranks_per_node": ranks_per_node, + "throughput": float(row.get("throughput_tasks_per_s", 0.0)), + "file_mtime": file_mtime, + } + ) + return rows + + +def group_rows(rows): + # First, filter to keep only newest results for each unique configuration + # Key: (system, distributor, mode, expected_ns, ranks_per_node, nodes) + # Value: (throughput, file_mtime) + # Note: Normalize mode for backward compatibility (poisson -> random) + newest_by_config = {} + for row in rows: + # Handle backward compatibility: treat "poisson" as "random" + normalized_mode = "random" if row["mode"] == "poisson" else row["mode"] + config_key = ( + row["system"], + row["distributor"], + normalized_mode, + row["expected_ns"], + row["ranks_per_node"], + row["nodes"], + ) + if config_key not in newest_by_config: + newest_by_config[config_key] = (row["throughput"], row["file_mtime"]) + else: + # Keep the one from the newest file + _, existing_mtime = newest_by_config[config_key] + if row["file_mtime"] > existing_mtime: + newest_by_config[config_key] = (row["throughput"], row["file_mtime"]) + + # Now group by (system, distributor, mode, expected_ns, ranks_per_node) for plotting + grouped = defaultdict(list) + for (system, distributor, mode, expected_ns, ranks_per_node, nodes), (throughput, _) in newest_by_config.items(): + key = (system, distributor, mode, expected_ns, ranks_per_node) + grouped[key].append((nodes, throughput)) + return grouped + + +def plot_distributor(system, distributor, grouped, output_dir, image_format): + modes = ["fixed", "random"] + + # Create separate plots for each mode + for mode in modes: + # Use scienceplots IEEE style + with plt.style.context(['science', 'ieee']): + fig, ax = plt.subplots(figsize=(IEEE_FIG_WIDTH, IEEE_FIG_HEIGHT)) + + series = [] + all_nodes = set() + ranks_per_node_value = None + for ( + sys_name, + dist, + mode_name, + expected_ns, + ranks_per_node, + ), points in grouped.items(): + # Handle backward compatibility: treat "poisson" as "random" + normalized_mode = "random" if mode_name == "poisson" else mode_name + if sys_name != system or dist != distributor or normalized_mode != mode: + continue + points_sorted = sorted(points, key=lambda x: x[0]) + nodes = [p[0] for p in points_sorted] + throughput = [p[1] for p in points_sorted] + all_nodes.update(nodes) + if ranks_per_node_value is None: + ranks_per_node_value = ranks_per_node + series.append((expected_ns, ranks_per_node, nodes, throughput)) + + # Skip creating plot if there's no data + if not series: + plt.close(fig) + continue + + # Sort series by expected_ns (duration) to ensure proper ordering + series_sorted = sorted(series, key=lambda x: x[0]) # Sort by expected_ns only + handles = [] + labels = [] + + # Plot actual data first to establish axis limits + for idx, (expected_ns, ranks_per_node, nodes, throughput) in enumerate(series_sorted): + # Remove rpn from legend label, only show duration + label = format_duration(expected_ns) + marker = MARKER_SHAPES[idx % len(MARKER_SHAPES)] + color = plt.cm.tab10(idx % 10) + # Use matplotlib's default color cycle for different colors + line, = ax.plot(nodes, throughput, marker=marker, label=label, + fillstyle='none', markeredgewidth=1.0, + color=color) + handles.append(line) + labels.append(label) + + ax.set_xlabel("Nodes") + ax.set_ylabel("Tasks per second") + ax.set_xscale("log", base=2) + ax.set_yscale("log") + # Show actual node counts (2, 4, 8, 16, ...) rather than 2^n formatting. + # Keep the log2 spacing but format ticks as plain integers. + if all_nodes: + node_ticks = sorted(all_nodes) + ax.xaxis.set_major_locator(FixedLocator(node_ticks)) + ax.xaxis.set_major_formatter(FuncFormatter(lambda x, pos: f"{int(x)}")) + + # Add very light grey underlying grid + ax.grid(True, which="both", linestyle="-", linewidth=0.5, color='lightgrey', alpha=0.5, zorder=0) + + # Store axis limits before plotting ideal lines + xlim = ax.get_xlim() + ylim = ax.get_ylim() + + # Plot ideal scaling lines without affecting axis limits + for idx, (expected_ns, ranks_per_node, nodes, throughput) in enumerate(series_sorted): + color = plt.cm.tab10(idx % 10) + # Add ideal scaling line: throughput = nodes * ranks_per_node * 1e9 / expected_ns + if all_nodes: + ideal_nodes = sorted(all_nodes) + ideal_throughput = [n * ranks_per_node * 1e9 / expected_ns for n in ideal_nodes] + ax.plot(ideal_nodes, ideal_throughput, linestyle='--', color=color, + linewidth=1.0, alpha=0.5, zorder=0) + + # Restore axis limits to those determined by actual data + ax.set_xlim(xlim) + ax.set_ylim(ylim) + + # Reorder handles and labels to go across columns first (row-major) + # Matplotlib's legend with ncol fills column-major (down columns first), + # so we need to transpose the order to get row-major display + ncol = 4 + n_items = len(handles) + n_rows = (n_items + ncol - 1) // ncol # Ceiling division + + # Create reordered lists: transpose so matplotlib's column-major fill gives row-major display + reordered_handles = [] + reordered_labels = [] + for col in range(ncol): + for row in range(n_rows): + idx = row * ncol + col + if idx < n_items: + reordered_handles.append(handles[idx]) + reordered_labels.append(labels[idx]) + + # Compact legend with increased column spacing - 4 columns at bottom, no border + # Items ordered by duration (1us, 10us, 100us, ...) going across columns first + ax.legend(reordered_handles, reordered_labels, + frameon=False, + ncol=ncol, columnspacing=0.8, + loc='upper center', bbox_to_anchor=(0.5, -0.15)) + + # Add rpn to filename + rpn_str = f"_{ranks_per_node_value}rpn" if ranks_per_node_value else "" + filename = f"strong_scaling_{system}_{distributor}_{mode}{rpn_str}.{image_format}" + fig.tight_layout(rect=[0, 0.12, 1, 1]) # Leave space at bottom for legend + fig.savefig(os.path.join(output_dir, filename), dpi=300, bbox_inches='tight') + plt.close(fig) + + +def main(): + parser = argparse.ArgumentParser(description="Plot strong scaling distribution throughput.") + parser.add_argument( + "--input", + required=True, + action="append", + help="CSV file or directory (can be passed multiple times)", + ) + parser.add_argument( + "--output-dir", required=True, help="Directory to write output plots" + ) + parser.add_argument( + "--format", default="png", choices=["png", "pdf", "svg"], help="Output image format" + ) + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + input_paths = collect_csv_paths(args.input) + rows = parse_rows(input_paths) + grouped = group_rows(rows) + + systems = sorted({row["system"] for row in rows}) + # Filter out empty distributors and only include those with actual data + distributors = sorted({row["distributor"] for row in rows if row["distributor"].strip()}) + for system in systems: + for distributor in distributors: + plot_distributor(system, distributor, grouped, args.output_dir, args.format) + + +if __name__ == "__main__": + main() diff --git a/benchmark/scripts/submit_aurora_naive_shutdown.sh b/benchmark/scripts/submit_aurora_naive_shutdown.sh new file mode 100755 index 0000000..50dfe62 --- /dev/null +++ b/benchmark/scripts/submit_aurora_naive_shutdown.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Submit one PBS job per node count to avoid long serial waits. +# Example: +# ./benchmark/scripts/submit_aurora_naive_shutdown.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=benchmark/scripts/aurora_queue_utils.sh +source "${SCRIPT_DIR}/aurora_queue_utils.sh" +SYSTEM="aurora" +SCRIPT="${ROOT_DIR}/benchmark/scripts/launch_aurora_naive_shutdown.sh" + +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST:-1 2 4 8 16 32 64 128 256 512}" +IFS=' ' read -r -a QSUB_ARGS <<< "${QSUB_ARGS:-}" +ACCOUNT="${ACCOUNT:-DynaMPI}" +FILESYSTEMS="${FILESYSTEMS:-flare}" +NCPUS_PER_NODE="${NCPUS_PER_NODE:-102}" + +WALLTIME="${WALLTIME:-00:15:00}" +LAUNCHER="${LAUNCHER:-}" +LAUNCHER_ARGS="${LAUNCHER_ARGS:-}" +OUTPUT_BASE="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" + +for nodes in "${NODE_LIST[@]}"; do + job_name="dynampi_shutdown_${SYSTEM}_${nodes}" + submit_args=("${QSUB_ARGS[@]}") + if [[ -n "${ACCOUNT}" ]]; then + submit_args+=(-A "${ACCOUNT}") + fi + if [[ "${nodes}" -lt 256 ]]; then + submit_args+=(-q "debug-scaling") + else + submit_args+=(-q "prod") + fi + wait_for_aurora_queue_space "${nodes}" + job_script="#!/usr/bin/env bash +#PBS -j oe +set -euo pipefail +cd \"${ROOT_DIR}\" +export NODE_LIST=\"${nodes}\" +export LAUNCHER=\"${LAUNCHER}\" +export LAUNCHER_ARGS=\"${LAUNCHER_ARGS}\" +export CORES_PER_NODE=\"${NCPUS_PER_NODE}\" +export OUTPUT_DIR=\"${OUTPUT_BASE}/${SYSTEM}/${nodes}-${job_name}-\${PBS_JOBID_SHORT:-manual}\" +${SCRIPT} +" + echo "qsub ${submit_args[*]} -N \"${job_name}\" -l \"select=${nodes}:ncpus=${NCPUS_PER_NODE}:mpiprocs=${NCPUS_PER_NODE}\" -l \"walltime=${WALLTIME}\" -l \"filesystems=${FILESYSTEMS}\" <<'QSUBEOF'" + echo "${job_script}" + echo "QSUBEOF" + qsub "${submit_args[@]}" -N "${job_name}" -l "select=${nodes}:ncpus=${NCPUS_PER_NODE}:mpiprocs=${NCPUS_PER_NODE}" -l "walltime=${WALLTIME}" \ + -l "filesystems=${FILESYSTEMS}" <<< "${job_script}" +done diff --git a/benchmark/scripts/submit_aurora_strong_scaling.sh b/benchmark/scripts/submit_aurora_strong_scaling.sh new file mode 100755 index 0000000..ad5c7de --- /dev/null +++ b/benchmark/scripts/submit_aurora_strong_scaling.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Submit one PBS job per node count to avoid long serial waits. +# Example: +# ./benchmark/scripts/submit_aurora_strong_scaling.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=benchmark/scripts/aurora_queue_utils.sh +source "${SCRIPT_DIR}/aurora_queue_utils.sh" +SYSTEM="aurora" +SCRIPT="${ROOT_DIR}/benchmark/scripts/launch_aurora_strong_scaling.sh" + +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST:-1 2 4 8 16 32 64 128 256 512}" +IFS=' ' read -r -a QSUB_ARGS <<< "${QSUB_ARGS:-}" +ACCOUNT="${ACCOUNT:-DynaMPI}" +FILESYSTEMS="${FILESYSTEMS:-flare}" +NCPUS_PER_NODE="${NCPUS_PER_NODE:-102}" + +WALLTIME="${WALLTIME:-00:15:00}" +LAUNCHER="${LAUNCHER:-}" +LAUNCHER_ARGS="${LAUNCHER_ARGS:-}" +OUTPUT_BASE="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" + +for nodes in "${NODE_LIST[@]}"; do + job_name="dynampi_ss_${SYSTEM}_${nodes}" + submit_args=("${QSUB_ARGS[@]}") + if [[ -n "${ACCOUNT}" ]]; then + submit_args+=(-A "${ACCOUNT}") + fi + if [[ "${nodes}" -lt 256 ]]; then + submit_args+=(-q "debug-scaling") + else + submit_args+=(-q "prod") + fi + wait_for_aurora_queue_space "${nodes}" + job_script="#!/usr/bin/env bash +#PBS -j oe +set -euo pipefail +cd \"${ROOT_DIR}\" +export NODE_LIST=\"${nodes}\" +export LAUNCHER=\"${LAUNCHER}\" +export LAUNCHER_ARGS=\"${LAUNCHER_ARGS}\" +export CORES_PER_NODE=\"${NCPUS_PER_NODE}\" +export OUTPUT_DIR=\"${OUTPUT_BASE}/${SYSTEM}/${nodes}-${job_name}-\${PBS_JOBID_SHORT:-manual}\" +${SCRIPT} +" + echo "qsub ${submit_args[*]} -N \"${job_name}\" -l \"select=${nodes}:ncpus=${NCPUS_PER_NODE}:mpiprocs=${NCPUS_PER_NODE}\" -l \"walltime=${WALLTIME}\" -l \"filesystems=${FILESYSTEMS}\" <<'QSUBEOF'" + echo "${job_script}" + echo "QSUBEOF" + qsub "${submit_args[@]}" -N "${job_name}" -l "select=${nodes}:ncpus=${NCPUS_PER_NODE}:mpiprocs=${NCPUS_PER_NODE}" -l "walltime=${WALLTIME}" \ + -l "filesystems=${FILESYSTEMS}" <<< "${job_script}" +done diff --git a/benchmark/scripts/submit_frontier_naive_shutdown.sh b/benchmark/scripts/submit_frontier_naive_shutdown.sh new file mode 100755 index 0000000..cc588aa --- /dev/null +++ b/benchmark/scripts/submit_frontier_naive_shutdown.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Submit one Slurm job per node count to avoid long serial waits. +# Example: +# ./benchmark/scripts/submit_frontier_naive_shutdown.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SYSTEM="frontier" +SCRIPT="${ROOT_DIR}/benchmark/scripts/launch_frontier_naive_shutdown.sh" + +SBATCH_ARGS=() +if [[ -z "${NODE_LIST:-}" ]]; then + NODE_LIST="1 2 4 8 16 32 64 128 256 512" +fi +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST}" +IFS=' ' read -r -a SBATCH_ARGS <<< "${SBATCH_ARGS:-}" +ACCOUNT="${ACCOUNT:-chm213}" + +WALLTIME="${WALLTIME:-00:15:00}" +LAUNCHER="${LAUNCHER:-}" +LAUNCHER_ARGS="${LAUNCHER_ARGS:-}" +OUTPUT_BASE="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" + +for nodes in "${NODE_LIST[@]}"; do + job_name="dynampi_shutdown_${SYSTEM}_${nodes}" + submit_args=(${SBATCH_ARGS[@]+"${SBATCH_ARGS[@]}"}) + if [[ -n "${ACCOUNT}" ]]; then + submit_args+=(--account="${ACCOUNT}") + fi + wrap="cd ${ROOT_DIR} && OUTPUT_DIR=\"${OUTPUT_BASE}/${SYSTEM}/${nodes}-${job_name}-\${SLURM_JOB_ID:-manual}\" ${SCRIPT}" + echo "sbatch ${submit_args[*]} --job-name=\"${job_name}\" --nodes=${nodes} --time=${WALLTIME} --export=ALL,NODE_LIST=${nodes},LAUNCHER=${LAUNCHER},LAUNCHER_ARGS=${LAUNCHER_ARGS} --wrap=\"${wrap}\"" + sbatch "${submit_args[@]}" \ + --job-name="${job_name}" \ + --nodes="${nodes}" \ + --time="${WALLTIME}" \ + --export=ALL,NODE_LIST="${nodes}",LAUNCHER="${LAUNCHER}",LAUNCHER_ARGS="${LAUNCHER_ARGS}" \ + --wrap="${wrap}" +done diff --git a/benchmark/scripts/submit_frontier_strong_scaling.sh b/benchmark/scripts/submit_frontier_strong_scaling.sh new file mode 100755 index 0000000..9b69bea --- /dev/null +++ b/benchmark/scripts/submit_frontier_strong_scaling.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks +# SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + +# Submit one Slurm job per node count to avoid long serial waits. +# Example: +# ./benchmark/scripts/submit_frontier_strong_scaling.sh + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SYSTEM="frontier" +SCRIPT="${ROOT_DIR}/benchmark/scripts/launch_frontier_strong_scaling.sh" + +SBATCH_ARGS=() +if [[ -z "${NODE_LIST:-}" ]]; then + NODE_LIST="1 2 4 8 16 32 64 128 256 512" +fi +IFS=' ' read -r -a NODE_LIST <<< "${NODE_LIST}" +IFS=' ' read -r -a SBATCH_ARGS <<< "${SBATCH_ARGS:-}" +ACCOUNT="${ACCOUNT:-chm213}" + +WALLTIME="${WALLTIME:-00:15:00}" +LAUNCHER="${LAUNCHER:-}" +LAUNCHER_ARGS="${LAUNCHER_ARGS:-}" +OUTPUT_BASE="${OUTPUT_DIR:-${ROOT_DIR}/benchmark/results}" + +for nodes in "${NODE_LIST[@]}"; do + job_name="dynampi_ss_${SYSTEM}_${nodes}" + submit_args=(${SBATCH_ARGS[@]+"${SBATCH_ARGS[@]}"}) + if [[ -n "${ACCOUNT}" ]]; then + submit_args+=(--account="${ACCOUNT}") + fi + wrap="cd ${ROOT_DIR} && OUTPUT_DIR=\"${OUTPUT_BASE}/${SYSTEM}/${nodes}-${job_name}-\${SLURM_JOB_ID:-manual}\" ${SCRIPT}" + echo "sbatch ${submit_args[*]} --job-name=\"${job_name}\" --nodes=${nodes} --time=${WALLTIME} --export=ALL,NODE_LIST=${nodes},LAUNCHER=${LAUNCHER},LAUNCHER_ARGS=${LAUNCHER_ARGS} --wrap=\"${wrap}\"" + sbatch "${submit_args[@]}" \ + --job-name="${job_name}" \ + --nodes="${nodes}" \ + --time="${WALLTIME}" \ + --export=ALL,NODE_LIST="${nodes}",LAUNCHER="${LAUNCHER}",LAUNCHER_ARGS="${LAUNCHER_ARGS}" \ + --wrap="${wrap}" +done diff --git a/benchmark/strong_scaling_distribution_rate.cpp b/benchmark/strong_scaling_distribution_rate.cpp new file mode 100644 index 0000000..14d6e0a --- /dev/null +++ b/benchmark/strong_scaling_distribution_rate.cpp @@ -0,0 +1,283 @@ +/* + * SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using Task = uint32_t; + +enum class DistributorKind { Naive, Hierarchical }; +enum class DurationMode { Fixed, Poisson }; + +struct BenchmarkOptions { + uint64_t expected_us = 1; + double duration_s = 10.0; + DistributorKind distributor = DistributorKind::Hierarchical; + DurationMode duration_mode = DurationMode::Fixed; + uint64_t nodes = 0; + std::string system; + std::string output_path; +}; + +struct BenchmarkResult { + uint64_t total_tasks = 0; + uint64_t workers = 0; + uint64_t world_size = 0; + double elapsed_s = 0.0; +}; + +static DistributorKind parse_distributor(const std::string& value) { + if (value == "naive") return DistributorKind::Naive; + if (value == "hierarchical") return DistributorKind::Hierarchical; + throw std::runtime_error("Unknown distributor: " + value); +} + +static DurationMode parse_duration_mode(const std::string& value) { + if (value == "fixed") return DurationMode::Fixed; + if (value == "poisson" || value == "random") return DurationMode::Poisson; + throw std::runtime_error("Unknown duration mode: " + value); +} + +static std::string to_string(DistributorKind kind) { + return kind == DistributorKind::Naive ? "naive" : "hierarchical"; +} + +static std::string to_string(DurationMode mode) { + return mode == DurationMode::Fixed ? "fixed" : "random"; +} + +static void spin_wait(std::chrono::microseconds duration) { + auto start = std::chrono::high_resolution_clock::now(); + while (std::chrono::high_resolution_clock::now() - start < duration) { + } +} + +static void write_csv_header(std::ostream& os) { + os << "system,distributor,mode,expected_us," + "duration_s,nodes,world_size,workers,total_tasks,elapsed_s," + "throughput_tasks_per_s\n"; +} + +static void write_csv_row(std::ostream& os, const BenchmarkOptions& opts, + const BenchmarkResult& result) { + const double throughput = + result.elapsed_s > 0.0 ? static_cast(result.total_tasks) / result.elapsed_s : 0.0; + os << opts.system << "," << to_string(opts.distributor) << "," << to_string(opts.duration_mode) + << "," << opts.expected_us << "," << opts.duration_s << "," << opts.nodes << "," + << result.world_size << "," << result.workers << "," << result.total_tasks << "," + << result.elapsed_s << "," << throughput << "\n"; +} + +template +static BenchmarkResult run_benchmark(const BenchmarkOptions& opts, MPI_Comm comm) { + dynampi::MPICommunicator<> comm_wrapper(comm, dynampi::MPICommunicator<>::Ownership::Reference); + int rank = 0; + int size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + + const uint64_t num_workers = (size == 1) ? 1 : static_cast(size - 1); + + struct WorkerFunctor { + std::mt19937_64 rng; + std::uniform_int_distribution uniform; + uint64_t expected_us; + DurationMode duration_mode; + + WorkerFunctor(int rank, uint64_t expected_us, DurationMode mode) + : rng([rank]() { + std::random_device rd; + std::mt19937_64 seed_gen(rd()); + return seed_gen() + static_cast(rank); + }()), + uniform(0, 2 * expected_us), + expected_us(expected_us), + duration_mode(mode) {} + + uint32_t operator()(Task task) { + uint32_t value = task; + uint64_t duration_us = expected_us; + if (duration_mode == DurationMode::Poisson) { + duration_us = uniform(rng); + } + spin_wait(std::chrono::microseconds(duration_us)); + const uint64_t squared = static_cast(value) * static_cast(value); + return static_cast(squared); + } + }; + + WorkerFunctor worker_function(rank, opts.expected_us, opts.duration_mode); + + MPI_Barrier(comm_wrapper); + dynampi::Timer timer(dynampi::Timer::AutoStart::No); + uint64_t total_tasks = 0; + + Distributor distributor(worker_function, {.comm = comm, .manager_rank = 0}); + + if (distributor.is_root_manager()) { + timer.start(); + + const uint64_t target_queue_size = num_workers * 4; + while (timer.elapsed().count() < opts.duration_s) { + const uint64_t remaining = distributor.remaining_tasks_count(); + uint64_t to_insert = 0; + if (remaining < target_queue_size) { + to_insert = target_queue_size - remaining; + } + if (timer.elapsed().count() > opts.duration_s / 2.0 && total_tasks > 0) { + double current_rate = static_cast(total_tasks) / timer.elapsed().count(); + double estimated_total_tasks = current_rate * opts.duration_s; + if (estimated_total_tasks > static_cast(total_tasks) && current_rate > 0.0) { + double remaining_time = opts.duration_s - timer.elapsed().count(); + uint64_t can_complete_tasks_remaining = + static_cast(current_rate * remaining_time); + if (can_complete_tasks_remaining > remaining) { + uint64_t max_to_insert = can_complete_tasks_remaining - remaining; + to_insert = std::min(to_insert, max_to_insert); + } else { + // Already have more tasks queued than can be completed, don't insert more + to_insert = 0; + } + } + } + // Clamp to_insert to be non-negative and <= target_queue_size + to_insert = std::min(to_insert, target_queue_size); + + if (to_insert > 0) { + std::vector tasks; + tasks.reserve(to_insert); + for (uint64_t i = 0; i < to_insert; ++i) { + tasks.push_back(static_cast(total_tasks + i)); + } + distributor.insert_tasks(tasks); + } + auto results = + distributor.run_tasks({.target_num_tasks = num_workers * 2, .max_seconds = 0.1}); + total_tasks += results.size(); + } + { + auto results = distributor.finish_remaining_tasks(); + total_tasks += results.size(); + } + timer.stop(); + distributor.finalize(); + } + + return BenchmarkResult{total_tasks, num_workers, static_cast(size), + timer.elapsed().count()}; +} + +int main(int argc, char** argv) { + MPI_Init(&argc, &argv); + int world_rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + cxxopts::Options options("strong_scaling_distribution_rate", + "Benchmark strong scaling task distribution throughput"); + options.add_options()("t,expected_us", "Expected task duration in microseconds", + cxxopts::value()->default_value("1"))( + "d,duration_s", "Target duration in seconds", cxxopts::value()->default_value("10"))( + "D,distribution", "Distribution strategy: naive or hierarchical", + cxxopts::value()->default_value("hierarchical"))( + "m,mode", "Duration mode: fixed or random (uniform 0-2x expected)", + cxxopts::value()->default_value("fixed"))( + "n,nodes", "Number of nodes for labeling output (defaults to world size)", + cxxopts::value()->default_value("0"))( + "S,system", "System label for plotting (frontier, aurora, ...)", + cxxopts::value()->default_value(""))( + "o,output", "Append results to CSV file", cxxopts::value()->default_value(""))( + "h,help", "Print usage"); + + cxxopts::ParseResult args; + try { + args = options.parse(argc, argv); + } catch (const std::exception& e) { + if (world_rank == 0) { + std::cerr << "Error parsing options: " << e.what() << "\n" << options.help() << std::endl; + } + MPI_Finalize(); + return 1; + } + + if (args.count("help")) { + if (world_rank == 0) { + std::cout << options.help() << std::endl; + } + MPI_Finalize(); + return 0; + } + + BenchmarkOptions opts; + opts.expected_us = args["expected_us"].as(); + opts.duration_s = args["duration_s"].as(); + opts.distributor = parse_distributor(args["distribution"].as()); + opts.duration_mode = parse_duration_mode(args["mode"].as()); + opts.nodes = args["nodes"].as(); + opts.system = args["system"].as(); + opts.output_path = args["output"].as(); + + if (opts.expected_us == 0) { + if (world_rank == 0) { + std::cerr << "Expected task duration must be >= 1 microsecond." << std::endl; + } + MPI_Finalize(); + return 1; + } + + { + MPI_Comm comm = MPI_COMM_WORLD; + int rank = 0; + int size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + if (opts.nodes == 0) { + opts.nodes = static_cast(size); + } + + BenchmarkResult result; + if (opts.distributor == DistributorKind::Naive) { + result = run_benchmark>(opts, comm); + } else { + result = run_benchmark>(opts, comm); + } + + if (rank == 0) { + const double throughput = + result.elapsed_s > 0.0 ? static_cast(result.total_tasks) / result.elapsed_s : 0.0; + std::cout << "RESULT" + << " distributor=" << to_string(opts.distributor) + << " mode=" << to_string(opts.duration_mode) << " expected_us=" << opts.expected_us + << " nodes=" << opts.nodes << " world_size=" << result.world_size + << " total_tasks=" << result.total_tasks << " elapsed_s=" << result.elapsed_s + << " throughput_tasks_per_s=" << throughput << std::endl; + if (!opts.output_path.empty()) { + std::ifstream check(opts.output_path); + const bool needs_header = + !check.good() || check.peek() == std::ifstream::traits_type::eof(); + check.close(); + std::ofstream out(opts.output_path, std::ios::app); + if (needs_header) { + write_csv_header(out); + } + write_csv_row(out, opts, result); + } + } + } + MPI_Finalize(); + return 0; +} diff --git a/benchmark/timer_resolution.cpp b/benchmark/timer_resolution.cpp new file mode 100644 index 0000000..1b7b586 --- /dev/null +++ b/benchmark/timer_resolution.cpp @@ -0,0 +1,94 @@ +/* + * SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include + +void print_resolution_stats(std::vector& deltas, int iterations) { + if (deltas.empty()) { + std::cout << " Measured resolution: < 1 ns (no measurable difference in " << iterations + << " iterations)\n"; + } else { + std::sort(deltas.begin(), deltas.end()); + double min_delta = deltas[0]; + double median_delta = deltas.size() % 2 == 0 + ? (deltas[deltas.size() / 2 - 1] + deltas[deltas.size() / 2]) / 2.0 + : deltas[deltas.size() / 2]; + double mean_delta = std::accumulate(deltas.begin(), deltas.end(), 0.0) / deltas.size(); + + std::cout << " Measured resolution (min): " << min_delta << " ns\n"; + std::cout << " Measured resolution (median): " << median_delta << " ns\n"; + std::cout << " Measured resolution (mean): " << mean_delta << " ns\n"; + std::cout << " Non-zero measurements: " << deltas.size() << "/" << iterations << "\n"; + } +} + +template +std::vector measure_resolution(GetTimePoint&& get_time_point, int iterations) { + std::vector deltas; + + for (int i = 0; i < iterations; ++i) { + auto t1 = get_time_point(); + auto t2 = get_time_point(); + // Wait for time to advance + while (t2 <= t1) { + t2 = get_time_point(); + } + auto delta = std::chrono::duration_cast(t2 - t1).count(); + if (delta > 0) { + deltas.push_back(static_cast(delta)); + } + } + + return deltas; +} + +template +void test_clock_resolution(const char* name) { + using Duration = typename Clock::duration; + using Period = typename Duration::period; + + std::cout << "\n" << name << ":\n"; + std::cout << " Period: " << Period::num; + if constexpr (Period::den != 1) { + std::cout << "/" << Period::den; + } + std::cout << " seconds\n"; + + const int iterations = 10000; + auto deltas = measure_resolution([]() { return Clock::now(); }, iterations); + print_resolution_stats(deltas, iterations); + + // Test if clock is steady + bool is_steady = Clock::is_steady; + std::cout << " Is steady: " << (is_steady ? "yes" : "no") << "\n"; +} + +void test_timer_resolution() { + std::cout << "\nDynaMPI Timer:\n"; + + const int iterations = 10000; + dynampi::Timer timer(dynampi::Timer::AutoStart::No); + timer.start(); + auto deltas = measure_resolution([&timer]() { return timer.elapsed(); }, iterations); + print_resolution_stats(deltas, iterations); +} + +int main() { + std::cout << "Timer Resolution Test\n"; + std::cout << "====================\n"; + + test_clock_resolution("high_resolution_clock"); + test_clock_resolution("steady_clock"); + test_clock_resolution("system_clock"); + test_timer_resolution(); + + std::cout << "\n"; + return 0; +} diff --git a/include/dynampi/impl/hierarchical_distributor.hpp b/include/dynampi/impl/hierarchical_distributor.hpp index 6725f9c..5a20f1d 100644 --- a/include/dynampi/impl/hierarchical_distributor.hpp +++ b/include/dynampi/impl/hierarchical_distributor.hpp @@ -7,18 +7,23 @@ #include #include -#include +#include #include +#include #include +#include #include #include #include +#include #include #include #include "../mpi/mpi_communicator.hpp" #include "../mpi/mpi_types.hpp" #include "dynampi/impl/base_distributor.hpp" +#include "dynampi/utilities/assert.hpp" +#include "dynampi/utilities/timer.hpp" namespace dynampi { @@ -32,34 +37,202 @@ class HierarchicalMPIWorkDistributor : public BaseMPIWorkDistributor message_batch_size = std::nullopt; - size_t max_workers_per_coordinator = 2; + std::optional max_workers_per_coordinator = std::nullopt; + int batch_size_multiplier = 2; + + // If true, topology is strictly mapped to physical nodes: + // Manager <-> Node Coordinators <-> Local Workers + // Note: Manager is excluded from its node's Local Comm to separate duties. + bool coordinator_per_node = true; + }; + + struct RunConfig { + // Stop once we have at least this many results ready to return. + size_t target_num_tasks = std::numeric_limits::max(); + + // If false, strictly clips the return vector to `target_num_tasks`. + // Excess results are buffered for the next call. + bool allow_more_than_target_tasks = true; + + // Stop if this much time has passed. + std::optional max_seconds = std::nullopt; }; static constexpr bool prioritize_tasks = Base::prioritize_tasks; + static const bool ordered = false; private: - typename Base::QueueT _unallocated_task_queue; - std::vector _worker_current_task_indices; - std::vector _results; - std::stack> _free_worker_indices; + typename Base::QueueT m_unallocated_task_queue; + std::vector m_results; + + enum class CommLayer { Global, Local, Leader }; + + struct TaskRequest { + int worker_rank; + CommLayer source_layer = CommLayer::Global; // Which comm did this come from? + std::optional num_tasks_requested = std::nullopt; + }; + static constexpr int kMaxTasksRequested = 1'000'000; // guard against pathological reserve() + std::stack> m_free_worker_indices; + + size_t m_tasks_sent_to_child = 0; + size_t m_results_received_from_child = 0; + size_t m_results_sent_to_parent = 0; + size_t m_tasks_received_from_parent = 0; + size_t m_tasks_executed = 0; + size_t m_results_returned = 0; - size_t _tasks_sent = 0; - size_t _results_received = 0; - bool _finalized = false; + bool m_finalized = false; + bool m_done = false; static constexpr StatisticsMode statistics_mode = get_option_value(); using MPICommunicator = dynampi::MPICommunicator>; - MPICommunicator _communicator; - std::function _worker_function; - Config _config; - enum Tag : int { TASK = 0, DONE = 1, RESULT = 2, REQUEST = 3, ERROR = 4 }; + MPICommunicator m_communicator; // Global communicator + MPIGroup m_world_group; // Group for the global communicator (for rank translation) + std::optional m_local_group; // Intra-node group (Shared Memory, excludes manager) + std::optional + m_leader_group; // Inter-node group (Leaders only: manager + node coordinators) + + std::function m_worker_function; + Config m_config; + + // Cached parent target to avoid repeated MPI_Group_translate_ranks calls + mutable std::optional> m_cached_parent_target; + + // --- Topology Helper Methods --- + + inline int max_workers_per_coordinator() const { + const int default_value = std::max(2, static_cast(std::sqrt(m_communicator.size()))); + const int configured = m_config.max_workers_per_coordinator.value_or(default_value); + return std::max(1, configured); + } + + // Returns {parent_rank, communicator_layer} + inline std::pair get_parent_target() const { + // Return cached value if available + if (m_cached_parent_target.has_value()) { + return m_cached_parent_target.value(); + } + + std::pair result; + DYNAMPI_ASSERT(!is_root_manager(), "Root manager should not have a parent"); + if (m_config.coordinator_per_node) { + DYNAMPI_ASSERT(m_local_group.has_value() || m_leader_group.has_value(), + "Local or leader group should be present"); + if (m_local_group && m_local_group->rank() > 0) { + // Case 1: I am a Local Worker (Rank > 0 in Local Group) + // Parent is the Node Coordinator (Local Rank 0). + // Translate local rank 0 to world rank + int node_coord_world_rank = m_local_group->translate_rank(0, m_world_group); + result = {node_coord_world_rank, CommLayer::Local}; + } else { + // Case 2: I am a Node Coordinator (Local Rank 0). + // Parent is the Global Manager. + // With the new topology, Manager is ALWAYS in the leader group. + // We need the manager's world rank, which we already have + int global_manager = m_config.manager_rank; + result = std::make_pair(global_manager, CommLayer::Leader); + } + } else { + // Original Logic + int rank = m_communicator.rank(); + int virtual_rank = rank == m_config.manager_rank ? 0 : idx_for_worker(rank) + 1; + int virtual_parent = (virtual_rank - 1) / max_workers_per_coordinator(); + int parent_rank = + virtual_parent == 0 ? m_config.manager_rank : worker_for_idx(virtual_parent - 1); + result = {parent_rank, CommLayer::Global}; + } + + // Cache the result + m_cached_parent_target = result; + return result; + } + + inline int total_num_children(int rank) const { + if (m_config.coordinator_per_node) { + DYNAMPI_UNIMPLEMENTED("Recursive child counting not supported/needed in Node topology mode"); + return 0; + } + int virtual_rank = rank == m_config.manager_rank ? 0 : idx_for_worker(rank) + 1; + int num_children = 0; + int max_children = max_workers_per_coordinator(); + for (int i = 0; i < max_children; ++i) { + int child = virtual_rank * max_children + i + 1; + if (child >= m_communicator.size()) break; // No more children + num_children += 1 + total_num_children(worker_for_idx(child - 1)); + } + return num_children; + } + + // Calculate number of direct children based on active topology + inline int num_direct_children() const { + if (m_config.coordinator_per_node) { + int count = 0; + // 1. Local Children: Everyone in local group except me (Rank 0) + if (m_local_group && m_local_group->rank() == 0) { + count += (m_local_group->size() - 1); + } + // 2. Remote Children: If I am Manager, other Leaders are my children. + // Note: In this topology, Manager is IN leader group, but NOT in local group. + if (is_root_manager() && m_leader_group) { + count += (m_leader_group->size() - 1); + } + return count; + } else { + // Original Logic + int rank = m_communicator.rank(); + int num_children = 0; + int max_children = max_workers_per_coordinator(); + for (int i = 0; i < max_children; ++i) { + int virtual_rank = rank == m_config.manager_rank ? 0 : idx_for_worker(rank) + 1; + int virtual_child = virtual_rank * max_children + i + 1; + if (virtual_child < m_communicator.size()) { + num_children++; + } + } + return num_children; + } + } + + bool is_leaf_worker() const { + if (m_config.coordinator_per_node) { + if (is_root_manager()) return false; + + // If I am NOT in local group (should only be Manager, handled above), panic? + // Actually, with this topology, everyone except Manager is in local group. + if (!m_local_group) return true; // Safety fallback + + // Standard Worker: Rank > 0 in Local Group + if (m_local_group->rank() > 0) return true; + + // Node Coordinator: Rank 0 in Local Comm. + // Leaf only if single-core node (no children). + return num_direct_children() == 0; + } else { + int rank = m_communicator.rank(); + int max_children = max_workers_per_coordinator(); + int virtual_rank = rank == m_config.manager_rank ? 0 : idx_for_worker(rank) + 1; + int first_child_virtual = virtual_rank * max_children + 1; + return first_child_virtual >= m_communicator.size(); + } + } + + enum Tag : int { + TASK = 0, + DONE = 1, + RESULT = 2, + REQUEST = 3, + TASK_BATCH = 4, + RESULT_BATCH = 5, + REQUEST_BATCH = 6 + }; struct Statistics { const CommStatistics& comm_statistics; - std::vector worker_task_counts; + std::optional> worker_task_counts = {}; }; using StatisticsT = @@ -69,7 +242,7 @@ class HierarchicalMPIWorkDistributor : public BaseMPIWorkDistributor worker_function, Config runtime_config = Config{}) - : _communicator(runtime_config.comm, MPICommunicator::Duplicate), - _worker_function(worker_function), - _config(runtime_config), - _statistics{create_statistics(_communicator)} { - if (is_root_manager()) _worker_current_task_indices.resize(_communicator.size() - 1, -1); - if (_config.auto_run_workers && _communicator.rank() != _config.manager_rank) { - run_worker(); + : m_communicator(runtime_config.comm, MPICommunicator::Duplicate), + m_world_group(m_communicator), + m_worker_function(worker_function), + m_config(runtime_config), + _statistics{create_statistics(m_communicator)} { + // --- Initialize Topology Groups --- + if (m_config.coordinator_per_node) { + // 1. Identify physical nodes via split_by_node + MPICommunicator node_comm = m_communicator.split_by_node(); + + // 2. Create Local Group: Exclude Manager! + // If I am Manager, color is Undefined (I don't participate in local worker pool). + // Everyone else participates. + int local_color = (m_communicator.rank() == m_config.manager_rank) ? MPI_UNDEFINED : 0; + + auto local_comm_opt = node_comm.split(local_color, m_communicator.rank()); + if (local_comm_opt.has_value()) { + // Extract group from the temporary communicator, then let it be freed + m_local_group.emplace(*local_comm_opt); + } + + // 3. Create Leader Group + // Who joins? + // A: The Manager (Always) + // B: The Node Coordinators (Rank 0 of the *Local* Comm) + bool is_manager = (m_communicator.rank() == m_config.manager_rank); + // Check if we're rank 0 in the local group (node coordinator) + bool is_node_coordinator = false; + if (m_local_group.has_value()) { + int my_local_rank = m_local_group->rank(); + is_node_coordinator = (my_local_rank == 0); + } + + int leader_color = (is_manager || is_node_coordinator) ? 0 : MPI_UNDEFINED; + + // Key is global rank to maintain global ordering among leaders + auto leader_comm_opt = m_communicator.split(leader_color, m_communicator.rank()); + if (leader_comm_opt.has_value()) { + // Extract group from the temporary communicator, then let it be freed + m_leader_group.emplace(*leader_comm_opt); + } } - if constexpr (statistics_mode >= StatisticsMode::Aggregated) { - if (is_root_manager()) _statistics.worker_task_counts.resize(_communicator.size(), 0); + + if (m_config.auto_run_workers && m_communicator.rank() != m_config.manager_rank) { + run_worker(); } } const StatisticsT& get_statistics() const requires(statistics_mode != StatisticsMode::None) { - assert(is_root_manager() && "Only the manager can access statistics"); + DYNAMPI_ASSERT(is_root_manager(), "Only the manager can access statistics"); return _statistics; } void run_worker() { - assert(_communicator.rank() != _config.manager_rank && "Worker cannot run on the manager rank"); - using task_type = MPI_Type; - _communicator.send(nullptr, _config.manager_rank, Tag::REQUEST); - while (true) { - MPI_Status status; - DYNAMPI_MPI_CHECK(MPI_Probe, (MPI_ANY_SOURCE, MPI_ANY_TAG, _communicator.get(), &status)); - if (status.MPI_TAG == Tag::DONE) { - _communicator.recv_empty_message(_config.manager_rank, Tag::DONE); - break; + DYNAMPI_ASSERT(m_communicator.rank() != m_config.manager_rank, + "Worker cannot run on the manager rank"); + if (is_leaf_worker()) { + // Leaf workers (usually local ranks > 0) just request from parent + send_to_parent(nullptr, Tag::REQUEST); + while (!m_done) { + receive_from_anyone(); } - int count; - DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, task_type::value, &count)); - TaskT message; - task_type::resize(message, count); - _communicator.recv(message, _config.manager_rank, Tag::TASK); - _tasks_sent++; - ResultT result = _worker_function(message); - _communicator.send(result, _config.manager_rank, Tag::RESULT); - _results_received++; + } else { + // Intermediate nodes (Node Coordinators) + int num_children = num_direct_children(); + int prefetch = num_children * m_config.batch_size_multiplier; + + // Initial request to parent (Manager) + send_to_parent(prefetch, Tag::REQUEST_BATCH); + + while (!m_done) { + // If we have no tasks to give, wait for tasks from parent + while (!m_done && m_unallocated_task_queue.empty()) { + receive_from_anyone(); + } + + size_t num_tasks_should_be_received = m_unallocated_task_queue.size(); + + // Process tasks: Give to workers or execute ourselves if needed + while (!m_unallocated_task_queue.empty()) { + if (m_done) break; + + if (m_free_worker_indices.empty()) { + // Must wait for a worker to become free + receive_from_anyone(); + } else { + allocate_task_to_child(); + } + } + + // Wait for results from children + while (m_tasks_sent_to_child > m_results_received_from_child) { + receive_from_anyone(); + } + + if (m_done) break; + + (void)num_tasks_should_be_received; + DYNAMPI_ASSERT_EQ(m_results.size(), num_tasks_should_be_received); + + return_results_and_request_next_batch_from_manager(); + } + send_done_to_children_when_free(); } } - bool is_root_manager() const { return _communicator.rank() == _config.manager_rank; } + void return_results_and_request_next_batch_from_manager() { + DYNAMPI_ASSERT(!is_leaf_worker(), "Leaf workers should not return results directly"); + DYNAMPI_ASSERT_NE(m_communicator.rank(), m_config.manager_rank, + "Manager should not request tasks from itself"); + std::vector results = m_results; + m_results.clear(); + + send_to_parent(results, Tag::RESULT_BATCH); + m_results_sent_to_parent += results.size(); + } + + bool is_root_manager() const { return m_communicator.rank() == m_config.manager_rank; } size_t remaining_tasks_count() const { - assert(_communicator.rank() == _config.manager_rank && - "Only the manager can check remaining tasks"); - return _unallocated_task_queue.size(); + DYNAMPI_ASSERT_EQ(m_communicator.rank(), m_config.manager_rank, + "Only the manager can check remaining tasks"); + return m_unallocated_task_queue.size(); } void insert_task(TaskT task) requires(!prioritize_tasks) { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - _unallocated_task_queue.push_back(task); + DYNAMPI_ASSERT_EQ(m_communicator.rank(), m_config.manager_rank, + "Only the manager can distribute tasks"); + m_unallocated_task_queue.push_back(task); + m_tasks_received_from_parent++; } void insert_task(const TaskT& task, double priority) requires(prioritize_tasks) { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - _unallocated_task_queue.emplace(priority, task); + DYNAMPI_ASSERT_EQ(m_communicator.rank(), m_config.manager_rank, + "Only the manager can distribute tasks"); + m_unallocated_task_queue.emplace(priority, task); + m_tasks_received_from_parent++; } template requires std::ranges::input_range && (!prioritize_tasks) void insert_tasks(const Range& tasks) { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); + DYNAMPI_ASSERT_EQ(m_communicator.rank(), m_config.manager_rank, + "Only the manager can distribute tasks"); std::copy(std::ranges::begin(tasks), std::ranges::end(tasks), - std::back_inserter(_unallocated_task_queue)); + std::back_inserter(m_unallocated_task_queue)); + m_tasks_received_from_parent += + std::distance(std::ranges::begin(tasks), std::ranges::end(tasks)); } void insert_tasks(const std::vector& tasks) requires(!prioritize_tasks) @@ -155,120 +411,377 @@ class HierarchicalMPIWorkDistributor : public BaseMPIWorkDistributor(tasks)); } - void get_task_and_allocate() { - const TaskT task = get_next_task_to_send(); - if (_communicator.size() > 1) { - if (_free_worker_indices.empty()) { - // If no free workers, wait for a result to be received - receive_from_any_worker(); - } - int worker = _free_worker_indices.top(); - _free_worker_indices.pop(); - _worker_current_task_indices[idx_for_worker(worker)] = _tasks_sent; - if constexpr (statistics_mode >= StatisticsMode::Aggregated) { - _statistics.worker_task_counts[worker]++; + void allocate_task_to_child() { + if (m_communicator.size() > 1) { + DYNAMPI_ASSERT(!m_free_worker_indices.empty(), "Cannot allocate task with no free workers"); + + TaskRequest request = m_free_worker_indices.top(); + m_free_worker_indices.pop(); + + // Determine target and communicator based on request source + int worker_rank = request.worker_rank; + CommLayer layer = request.source_layer; + + if (request.num_tasks_requested.has_value()) { + std::vector tasks; + int num_tasks = request.num_tasks_requested.value(); + + const int actual_num_tasks = + std::min(num_tasks, static_cast(m_unallocated_task_queue.size())); + tasks.reserve(actual_num_tasks); + if constexpr (std::is_same_v>) { + tasks.assign(m_unallocated_task_queue.begin(), + m_unallocated_task_queue.begin() + actual_num_tasks); + m_unallocated_task_queue.erase(m_unallocated_task_queue.begin(), + m_unallocated_task_queue.begin() + actual_num_tasks); + } else { + for (int i = 0; i < actual_num_tasks; ++i) { + tasks.push_back(std::move(m_unallocated_task_queue.top().second)); + m_unallocated_task_queue.pop(); + } + } + + send_to_worker(tasks, worker_rank, Tag::TASK_BATCH, layer); + m_tasks_sent_to_child += tasks.size(); + } else { + const TaskT task = get_next_task_to_send(); + send_to_worker(task, worker_rank, Tag::TASK, layer); + m_tasks_sent_to_child++; } - _communicator.send(task, worker, Tag::TASK); } else { - // If there's only one process, we just run the worker function directly - _results.emplace_back(_worker_function(task)); - _results_received++; + const TaskT task = get_next_task_to_send(); + m_results.emplace_back(m_worker_function(task)); + m_tasks_executed++; } - _tasks_sent++; } - [[nodiscard]] std::vector finish_remaining_tasks() { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - while (!_unallocated_task_queue.empty()) { - get_task_and_allocate(); + [[nodiscard]] std::vector run_tasks(const RunConfig& config = RunConfig{}) { + DYNAMPI_ASSERT_EQ(m_communicator.rank(), m_config.manager_rank, + "Only the manager can finish remaining tasks"); + Timer timer; + + while (true) { + // A. Target reached + if (m_results.size() >= config.target_num_tasks) { + break; + } + + // B. Time limit + if (config.max_seconds && timer.elapsed().count() >= *config.max_seconds) { + break; + } + + // C. Exhaustion + size_t active_tasks = m_tasks_sent_to_child - m_results_received_from_child; + if (m_unallocated_task_queue.empty() && active_tasks == 0) { + break; + } + + bool tasks_available = !m_unallocated_task_queue.empty(); + bool workers_available = !m_free_worker_indices.empty(); + bool is_single_proc = (m_communicator.size() == 1); + + if (tasks_available && (is_single_proc || workers_available)) { + allocate_task_to_child(); + } else if (active_tasks > 0 || (tasks_available && !workers_available)) { + receive_from_anyone(); + } } - while (_free_worker_indices.size() + 1 < static_cast(_communicator.size())) { - receive_from_any_worker(); + + // --- Return Logic --- + std::vector batch; + + size_t available = m_results.size(); + size_t count_to_return = available; + + if (!config.allow_more_than_target_tasks) { + count_to_return = std::min(available, config.target_num_tasks); } - assert(_results_received == _tasks_sent && "Not all tasks were processed by workers"); - assert(_results.size() == _tasks_sent && "Results size should match tasks sent"); - return _results; + + batch.reserve(count_to_return); + auto end_it = m_results.begin() + count_to_return; + std::move(m_results.begin(), end_it, std::back_inserter(batch)); + m_results.erase(m_results.begin(), end_it); + + m_results_sent_to_parent += batch.size(); + return batch; + } + + [[nodiscard]] std::vector finish_remaining_tasks() { + RunConfig cfg; + cfg.target_num_tasks = std::numeric_limits::max(); + return run_tasks(cfg); } void finalize() { - assert(!_finalized && "Work distribution already finalized"); + DYNAMPI_ASSERT(!m_finalized, "Work distribution already finalized"); if (is_root_manager()) { - send_done_to_workers(); - _finalized = true; + send_done_to_children_when_free(); + } + m_finalized = true; + if constexpr (statistics_mode != StatisticsMode::None) { + if (is_root_manager()) { + _statistics.worker_task_counts = std::vector(m_communicator.size(), 0); + } + m_communicator.gather(m_tasks_executed, + _statistics.worker_task_counts.has_value() + ? &_statistics.worker_task_counts.value() + : nullptr, + m_config.manager_rank); } } ~HierarchicalMPIWorkDistributor() { - if (!_finalized) { + if (!m_finalized) { finalize(); } - assert(_tasks_sent == _results_received && "Not all tasks were processed by workers"); + DYNAMPI_ASSERT_EQ(m_results_received_from_child, m_tasks_sent_to_child, + "All tasks should have been processed by workers before finalizing"); + DYNAMPI_ASSERT_EQ(m_results_sent_to_parent, m_tasks_received_from_parent, + "All results should have been sent to the parent before finalizing"); + if (is_leaf_worker()) + DYNAMPI_ASSERT_EQ(m_results_received_from_child, 0, + "Leaf workers should not receive results from children"); + else if (m_communicator.size() > 1) + DYNAMPI_ASSERT_EQ(m_results_received_from_child + m_tasks_executed, m_results_sent_to_parent, + "Results received from children should match results sent to parent"); } private: TaskT get_next_task_to_send() { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can get next task"); - assert(!_unallocated_task_queue.empty() && "There should be tasks available to send"); + DYNAMPI_ASSERT(is_root_manager() || !is_leaf_worker(), + "Leaf workers should not send tasks directly"); + DYNAMPI_ASSERT(!m_unallocated_task_queue.empty(), "There should be tasks available to send"); TaskT task; - if constexpr (std::is_same_v>) { - task = _unallocated_task_queue.front(); - _unallocated_task_queue.pop_front(); + if constexpr (std::is_same_v>) { + task = m_unallocated_task_queue.front(); + m_unallocated_task_queue.pop_front(); } else { - task = _unallocated_task_queue.top().second; - _unallocated_task_queue.pop(); + task = m_unallocated_task_queue.top().second; + m_unallocated_task_queue.pop(); } return task; } - void send_done_to_workers() { - assert(_communicator.rank() == _config.manager_rank && - "Only the manager can finalize the work distribution"); - assert(_free_worker_indices.size() + 1 == static_cast(_communicator.size()) && - "All workers should be free before finalizing"); - for (int i = 0; i < _communicator.size() - 1; i++) { - _communicator.send(nullptr, worker_for_idx(i), Tag::DONE); - } - } - int idx_for_worker(int worker_rank) const { - assert(worker_rank != _config.manager_rank && - "Manager rank should not be used as a worker rank"); - if (worker_rank < _config.manager_rank) { + DYNAMPI_ASSERT_NE(worker_rank, m_config.manager_rank, + "Manager rank should not be used as a worker rank"); + if (worker_rank < m_config.manager_rank) { return worker_rank; } else { return worker_rank - 1; } } - int worker_for_idx(int idx) const { return (idx < _config.manager_rank) ? idx : (idx + 1); } - - void receive_from_any_worker() { - assert(_communicator.rank() == _config.manager_rank && - "Only the manager can receive results and send tasks"); - assert(_communicator.size() > 1 && - "There should be at least one worker to receive results from"); - using result_type = MPI_Type; - MPI_Status status; - DYNAMPI_MPI_CHECK(MPI_Probe, (MPI_ANY_SOURCE, MPI_ANY_TAG, _communicator.get(), &status)); - if (status.MPI_TAG == Tag::RESULT) { - int64_t task_idx = _worker_current_task_indices[status.MPI_SOURCE - - (status.MPI_SOURCE > _config.manager_rank)]; - _worker_current_task_indices[status.MPI_SOURCE - (status.MPI_SOURCE > _config.manager_rank)] = - -1; - assert(task_idx >= 0 && "Task index should be valid"); - if (static_cast(task_idx) >= _results.size()) { - _results.resize(task_idx + 1); + int worker_for_idx(int idx) const { return (idx < m_config.manager_rank) ? idx : (idx + 1); } + + // --- Helper: Determine which layer a world rank belongs to --- + CommLayer determine_layer_from_world_rank(int world_rank) const { + DYNAMPI_ASSERT(m_config.coordinator_per_node); + // Check if rank is in local group (and not manager) + if (m_local_group) { + int local_rank = m_world_group.translate_rank(world_rank, *m_local_group); + if (local_rank != MPI_UNDEFINED) { + return CommLayer::Local; } + } + DYNAMPI_ASSERT(m_leader_group.has_value(), "Leader group should be present"); + [[maybe_unused]] int leader_rank = m_world_group.translate_rank(world_rank, *m_leader_group); + DYNAMPI_ASSERT_NE(leader_rank, MPI_UNDEFINED, "Rank should be in leader group"); + return CommLayer::Leader; + } + + // --- Abstract Send Wrappers --- + + template + void send_to_parent(const T& data, Tag tag) { + auto [target, layer] = get_parent_target(); + DYNAMPI_ASSERT_NE(target, -1, "Root cannot send to parent"); + + // With groups, target is always a world rank, so use global communicator + m_communicator.send(data, target, tag); + } + + template + void send_to_worker(const T& data, int rank, Tag tag, [[maybe_unused]] CommLayer layer) { + // With groups, rank is stored as world rank in TaskRequest, so use global communicator + m_communicator.send(data, rank, tag); + } + + void send_done_to_children_when_free() { + const int direct_children = num_direct_children(); + int done_sent_count = 0; + while (done_sent_count < direct_children) { + if (m_free_worker_indices.empty()) { + receive_from_anyone(); + continue; + } + TaskRequest request = m_free_worker_indices.top(); + m_free_worker_indices.pop(); + + send_to_worker(nullptr, request.worker_rank, Tag::DONE, request.source_layer); + done_sent_count++; + } + } + + using result_mpi_type = MPI_Type; + using task_mpi_type = MPI_Type; + + void receive_result_from(MPI_Status status, [[maybe_unused]] MPICommunicator& source_comm, + CommLayer layer) { + m_results.push_back(ResultT{}); + if (result_mpi_type::resize_required) { + DYNAMPI_UNIMPLEMENTED( // LCOV_EXCL_LINE + "Dynamic resizing of results is not supported in hierarchical distribution"); + } + // With groups, always use global communicator and determine layer from source rank + int world_source = status.MPI_SOURCE; + if (m_config.coordinator_per_node) { + layer = determine_layer_from_world_rank(world_source); + } + m_communicator.recv(m_results.back(), world_source, Tag::RESULT); + m_results_received_from_child++; + m_free_worker_indices.push(TaskRequest{.worker_rank = world_source, .source_layer = layer}); + } + + void receive_result_batch_from(MPI_Status status, [[maybe_unused]] MPICommunicator& source_comm, + CommLayer layer) { + using message_type = MPI_Type>; + int count; + DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, message_type::value, &count)); + std::vector results; + message_type::resize(results, count); + // With groups, always use global communicator and determine layer from source rank + int world_source = status.MPI_SOURCE; + if (m_config.coordinator_per_node) { + layer = determine_layer_from_world_rank(world_source); + } + m_communicator.recv(results, world_source, Tag::RESULT_BATCH); + m_free_worker_indices.push({.worker_rank = world_source, + .source_layer = layer, + .num_tasks_requested = static_cast(results.size())}); + std::copy(results.begin(), results.end(), std::back_inserter(m_results)); + m_results_received_from_child += results.size(); + } + + void receive_execute_return_task_from(MPI_Status status, + [[maybe_unused]] MPICommunicator& source_comm, + [[maybe_unused]] CommLayer layer) { + int count; + DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, task_mpi_type::value, &count)); + TaskT message; + task_mpi_type::resize(message, count); + // With groups, always use global communicator + int world_source = status.MPI_SOURCE; + m_communicator.recv(message, world_source, Tag::TASK); + m_tasks_received_from_parent++; + ResultT result = m_worker_function(message); + m_tasks_executed++; + // Reply on the global communicator + m_communicator.send(result, world_source, Tag::RESULT); + m_results_sent_to_parent++; + } + + void receive_task_batch_from(MPI_Status status, [[maybe_unused]] MPICommunicator& source_comm, + [[maybe_unused]] CommLayer layer) { + if constexpr (prioritize_tasks) { + DYNAMPI_UNIMPLEMENTED("Prioritized hierarchical distribution"); + } else { + using message_type = MPI_Type>; int count; - DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, result_type::value, &count)); - result_type::resize(_results[task_idx], count); - _communicator.recv(_results[task_idx], status.MPI_SOURCE, Tag::RESULT); - _results_received++; + DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, message_type::value, &count)); + std::vector tasks; + message_type::resize(tasks, count); + // With groups, always use global communicator + int world_source = status.MPI_SOURCE; + m_communicator.recv(tasks, world_source, Tag::TASK_BATCH); + m_tasks_received_from_parent += tasks.size(); + for (const auto& task : tasks) { + m_unallocated_task_queue.push_back(task); + } + } + } + + void receive_request_from(MPI_Status status, [[maybe_unused]] MPICommunicator& source_comm, + CommLayer layer) { + // With groups, always use global communicator and determine layer from source rank + int world_source = status.MPI_SOURCE; + if (m_config.coordinator_per_node) { + layer = determine_layer_from_world_rank(world_source); + } + m_communicator.recv_empty_message(world_source, Tag::REQUEST); + m_free_worker_indices.push(TaskRequest{.worker_rank = world_source, .source_layer = layer}); + } + + void receive_request_batch_from(MPI_Status status, [[maybe_unused]] MPICommunicator& source_comm, + CommLayer layer) { + // With groups, always use global communicator and determine layer from source rank + int world_source = status.MPI_SOURCE; + if (m_config.coordinator_per_node) { + layer = determine_layer_from_world_rank(world_source); + } + int request_count; + m_communicator.recv(request_count, world_source, Tag::REQUEST_BATCH); + DYNAMPI_ASSERT_GT(request_count, 0, "Invalid request count"); + DYNAMPI_ASSERT_LE(request_count, kMaxTasksRequested, "Request count exceeds maximum allowed"); + m_free_worker_indices.push(TaskRequest{ + .worker_rank = world_source, .source_layer = layer, .num_tasks_requested = request_count}); + } + + void receive_done_from(MPI_Status status, [[maybe_unused]] MPICommunicator& source_comm, + [[maybe_unused]] CommLayer layer) { + // With groups, always use global communicator + int world_source = status.MPI_SOURCE; + m_communicator.recv_empty_message(world_source, Tag::DONE); + m_done = true; + } + + void receive_from_anyone() { + DYNAMPI_ASSERT_GT(m_communicator.size(), 1, + "There should be at least one worker to receive results from"); + + MPI_Status status{}; + CommLayer layer = CommLayer::Global; + + if (m_config.coordinator_per_node) { + // Poll global communicator non-blocking until a message is available + // The layer will be determined from the source rank in the receive methods + bool found = false; + while (!found) { + auto opt_status = m_communicator.iprobe(); + if (opt_status.has_value()) { + status = opt_status.value(); + found = true; + break; + } + std::this_thread::yield(); + } } else { - assert(status.MPI_TAG == Tag::REQUEST && "Unexpected tag received in worker"); - _communicator.recv_empty_message(status.MPI_SOURCE, Tag::REQUEST); + status = m_communicator.probe(); + } + + // Assert that the tag is a valid Tag enum value before casting + DYNAMPI_ASSERT(status.MPI_TAG >= static_cast(Tag::TASK) && + status.MPI_TAG <= static_cast(Tag::REQUEST_BATCH), + "Received invalid MPI tag: " + std::to_string(status.MPI_TAG)); + Tag tag = static_cast(status.MPI_TAG); + // Note: receive methods now use global communicator and determine layer from source rank + switch (tag) { + case Tag::TASK: + return receive_execute_return_task_from(status, m_communicator, layer); + case Tag::TASK_BATCH: + return receive_task_batch_from(status, m_communicator, layer); + case Tag::RESULT: + return receive_result_from(status, m_communicator, layer); + case Tag::RESULT_BATCH: + return receive_result_batch_from(status, m_communicator, layer); + case Tag::REQUEST: + return receive_request_from(status, m_communicator, layer); + case Tag::REQUEST_BATCH: + return receive_request_batch_from(status, m_communicator, layer); + case Tag::DONE: + return receive_done_from(status, m_communicator, layer); } - _free_worker_indices.push(status.MPI_SOURCE); } }; diff --git a/include/dynampi/impl/naive_distributor.hpp b/include/dynampi/impl/naive_distributor.hpp index a74a1c3..70523c4 100644 --- a/include/dynampi/impl/naive_distributor.hpp +++ b/include/dynampi/impl/naive_distributor.hpp @@ -10,16 +10,18 @@ #include #include #include -#include +#include +#include #include -#include #include #include +#include #include #include "../mpi/mpi_communicator.hpp" #include "../mpi/mpi_types.hpp" #include "dynampi/impl/base_distributor.hpp" +#include "dynampi/utilities/timer.hpp" namespace dynampi { @@ -36,30 +38,56 @@ class NaiveMPIWorkDistributor { // message. If a message exceeds this size, behavior is undefined. }; + struct RunConfig { + // Stop once we have at least this many contiguous results ready to return. + size_t target_num_tasks = std::numeric_limits::max(); + + // If false, strictly clips the return vector to `target_num_tasks`. + // Excess results remain in the internal buffer for the next call. + bool allow_more_than_target_tasks = true; + + // Stop if this much time has passed. + std::optional max_seconds = std::nullopt; + }; + + static const bool ordered = true; + private: static constexpr bool prioritize_tasks = get_option_value(); + static constexpr StatisticsMode statistics_mode = + get_option_value(); + using QueueT = std::conditional_t>, std::deque>; + using MPICommunicator = dynampi::MPICommunicator>; - QueueT _unallocated_task_queue; - std::vector _worker_current_task_indices; - std::vector _results; - std::stack> _free_worker_indices; + // --- Member Variables --- + Config m_config; + MPICommunicator m_communicator; + std::function m_worker_function; - size_t _tasks_sent = 0; - size_t _results_received = 0; - bool _finalized = false; + QueueT m_unallocated_task_queue; - static constexpr StatisticsMode statistics_mode = - get_option_value(); + // State tracking + std::vector m_worker_current_task_indices; // Maps worker_idx -> task_id + std::stack m_free_worker_ranks; - using MPICommunicator = dynampi::MPICommunicator>; - MPICommunicator _communicator; - std::function _worker_function; - Config _config; + // Transient Storage: + // We use a vector to store results by task ID, with a bitmap to track validity. + // Items are marked invalid as soon as they become contiguous and ready to return. + std::vector m_pending_results; + std::vector m_pending_results_valid; + + // Counters + size_t m_tasks_sent = 0; // Total tasks ever sent (acts as the unique ID for the next task) + size_t m_front_result_idx = 0; // The task ID of the result at the front of the vector (index 0) + size_t m_known_contiguous_results = + 0; // Number of contiguous valid results starting from m_front_result_idx + bool m_finalized = false; enum Tag : int { TASK = 0, DONE = 1, RESULT = 2, REQUEST = 3, ERROR = 4 }; + public: struct Statistics { const CommStatistics& comm_statistics; std::vector worker_task_counts; @@ -68,256 +96,321 @@ class NaiveMPIWorkDistributor { using StatisticsT = std::conditional_t; - StatisticsT _statistics; - - static StatisticsT create_statistics(const MPICommunicator& comm) { - if constexpr (statistics_mode != StatisticsMode::None) { - return Statistics{comm.get_statistics(), {}}; - } else { - return {}; - } - } + private: + StatisticsT m_statistics; public: explicit NaiveMPIWorkDistributor(std::function worker_function, Config runtime_config = Config{}) - : _communicator(runtime_config.comm, MPICommunicator::Duplicate), - _worker_function(worker_function), - _config(runtime_config), - _statistics{create_statistics(_communicator)} { - if (is_root_manager()) _worker_current_task_indices.resize(_communicator.size() - 1, -1); - if (_config.auto_run_workers && _communicator.rank() != _config.manager_rank) { + : m_config(runtime_config), + m_communicator(runtime_config.comm, MPICommunicator::Duplicate), + m_worker_function(worker_function), + m_statistics{create_statistics(m_communicator)} { + if (is_root_manager()) { + m_worker_current_task_indices.resize(num_workers(), -1); + } + + if (m_config.auto_run_workers && !is_root_manager()) { run_worker(); } + if constexpr (statistics_mode >= StatisticsMode::Aggregated) { - if (is_root_manager()) _statistics.worker_task_counts.resize(_communicator.size(), 0); + if (is_root_manager()) m_statistics.worker_task_counts.resize(m_communicator.size(), 0); } } - const StatisticsT& get_statistics() const - requires(statistics_mode != StatisticsMode::None) - { - assert(is_root_manager() && "Only the manager can access statistics"); - return _statistics; + ~NaiveMPIWorkDistributor() { + if (!m_finalized) finalize(); } - void run_worker() { - assert(_communicator.rank() != _config.manager_rank && "Worker cannot run on the manager rank"); - using task_type = MPI_Type; - // Send REQUEST as 0 elements of ResultT so manager can recv_any(ResultT&) for both REQUEST and - // RESULT - _communicator.template send_empty(_config.manager_rank, Tag::REQUEST); + // --- Main Interface --- + + [[nodiscard]] std::vector run_tasks(RunConfig config = RunConfig{}) { + assert(is_root_manager() && "Only the manager can distribute tasks"); + + Timer timer; + + // We loop until one of the exit conditions is met. while (true) { - MPI_Status status = _communicator.probe(); - if (status.MPI_TAG == Tag::DONE) { - _communicator.recv_empty_message(_config.manager_rank, Tag::DONE); + // --- 1. Check Exit Conditions --- + + // A. Have we collected enough contiguous results? + if (m_known_contiguous_results >= config.target_num_tasks) { break; } - int count; - DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, task_type::value, &count)); - TaskT message; - task_type::resize(message, count); - _communicator.recv(message, _config.manager_rank, Tag::TASK); - _tasks_sent++; - ResultT result = _worker_function(message); - _communicator.send(result, _config.manager_rank, Tag::RESULT); - _results_received++; + + // B. Time limit check + if (config.max_seconds && timer.elapsed().count() >= *config.max_seconds) { + break; + } + + // C. Total exhaustion check + if (m_unallocated_task_queue.empty() && active_worker_count() == 0) { + break; + } + + // --- 2. Action Logic (Send vs Receive) --- + + // Priority: Keep workers busy + if (!m_unallocated_task_queue.empty() && !m_free_worker_ranks.empty()) { + send_next_task_to_worker(m_free_worker_ranks.top()); + m_free_worker_ranks.pop(); + } else { + // Single process mode fallback + if (num_workers() == 0 && !m_unallocated_task_queue.empty()) { + run_task_locally(); + } + // Standard MPI wait + else if (active_worker_count() > 0) { + process_incoming_message(); + } + } + } + + // --- 3. Return Logic --- + size_t limit = std::numeric_limits::max(); + if (!config.allow_more_than_target_tasks) { + limit = config.target_num_tasks; + } + + return collect_available_results(limit); + } + + [[nodiscard]] std::vector finish_remaining_tasks() { + RunConfig cfg; + cfg.target_num_tasks = std::numeric_limits::max(); + return run_tasks(cfg); + } + + void finalize() { + assert(!m_finalized && "Work distribution already finalized"); + if (is_root_manager()) { + broadcast_done(); } + m_finalized = true; } - bool is_root_manager() const { return _communicator.rank() == _config.manager_rank; } + // --- Public Accessors --- + + bool is_root_manager() const { return m_communicator.rank() == m_config.manager_rank; } size_t remaining_tasks_count() const { - assert(_communicator.rank() == _config.manager_rank && - "Only the manager can check remaining tasks"); - return _unallocated_task_queue.size(); + assert(is_root_manager() && "Only the manager can check remaining tasks"); + return m_unallocated_task_queue.size(); } + const StatisticsT& get_statistics() const + requires(statistics_mode != StatisticsMode::None) + { + assert(is_root_manager() && "Only the manager can access statistics"); + return m_statistics; + } + + // --- Task Insertion --- + void insert_task(TaskT task) requires(!prioritize_tasks) { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - _unallocated_task_queue.push_back(task); + assert(is_root_manager()); + m_unallocated_task_queue.push_back(std::move(task)); } + void insert_task(const TaskT& task, double priority) requires(prioritize_tasks) { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - _unallocated_task_queue.emplace(priority, task); + assert(is_root_manager()); + m_unallocated_task_queue.emplace(priority, task); } - template - requires std::ranges::input_range && (!prioritize_tasks) - void insert_tasks(const Range& tasks) { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - std::copy(std::ranges::begin(tasks), std::ranges::end(tasks), - std::back_inserter(_unallocated_task_queue)); - } void insert_tasks(const std::vector& tasks) requires(!prioritize_tasks) { - insert_tasks(std::span(tasks)); + assert(is_root_manager()); + for (const auto& t : tasks) m_unallocated_task_queue.push_back(t); } - void get_task_and_allocate() { - const TaskT task = get_next_task_to_send(); - if (_communicator.size() > 1) { - if (_free_worker_indices.empty()) { - // If no free workers, wait for a result to be received - receive_from_any_worker(); - } - int worker = _free_worker_indices.top(); - _free_worker_indices.pop(); - _worker_current_task_indices[idx_for_worker(worker)] = _tasks_sent; - if constexpr (statistics_mode >= StatisticsMode::Aggregated) { - _statistics.worker_task_counts[worker]++; + // --- Worker Logic --- + + void run_worker() { + assert(!is_root_manager()); + using task_type = MPI_Type; + + // Handshake: send REQUEST as 0 elements of ResultT so manager can recv_any(ResultT&) for both + // REQUEST and RESULT + m_communicator.template send_empty(m_config.manager_rank, Tag::REQUEST); + + while (true) { + MPI_Status status = m_communicator.probe(); + + if (status.MPI_TAG == Tag::DONE) { + m_communicator.recv_empty_message(m_config.manager_rank, Tag::DONE); + break; } - _communicator.send(task, worker, Tag::TASK); - } else { - // If there's only one process, we just run the worker function directly - _results.emplace_back(_worker_function(task)); - _results_received++; - } - _tasks_sent++; - } - [[nodiscard]] std::vector finish_remaining_tasks() { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can distribute tasks"); - while (!_unallocated_task_queue.empty()) { - get_task_and_allocate(); - } - while (_free_worker_indices.size() + 1 < static_cast(_communicator.size())) { - receive_from_any_worker(); + int count; + DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, task_type::value, &count)); + TaskT message; + task_type::resize(message, count); + m_communicator.recv(message, m_config.manager_rank, Tag::TASK); + + ResultT result = m_worker_function(std::move(message)); + + m_communicator.send(result, m_config.manager_rank, Tag::RESULT); } - assert(_results_received == _tasks_sent && "Not all tasks were processed by workers"); - assert(_results.size() == _tasks_sent && "Results size should match tasks sent"); - return _results; } - void finalize() { - assert(!_finalized && "Work distribution already finalized"); - if (is_root_manager()) { - send_done_to_workers(); - _finalized = true; - } + private: + // --- Helpers --- + + int num_workers() const { return m_communicator.size() - 1; } + + size_t active_worker_count() const { + return static_cast(num_workers()) - m_free_worker_ranks.size(); } - ~NaiveMPIWorkDistributor() { - if (!_finalized) { - finalize(); - } - assert(_tasks_sent == _results_received && "Not all tasks were processed by workers"); + int rank_to_worker_idx(int rank) const { + return (rank < m_config.manager_rank) ? rank : (rank - 1); } - private: - TaskT get_next_task_to_send() { - assert(_communicator.rank() == _config.manager_rank && "Only the manager can get next task"); - assert(!_unallocated_task_queue.empty() && "There should be tasks available to send"); + int worker_idx_to_rank(int idx) const { return (idx < m_config.manager_rank) ? idx : (idx + 1); } + + TaskT pop_next_task() { TaskT task; - if constexpr (std::is_same_v>) { - task = _unallocated_task_queue.front(); - _unallocated_task_queue.pop_front(); + if constexpr (prioritize_tasks) { + task = m_unallocated_task_queue.top().second; + m_unallocated_task_queue.pop(); } else { - task = _unallocated_task_queue.top().second; - _unallocated_task_queue.pop(); + task = std::move(m_unallocated_task_queue.front()); + m_unallocated_task_queue.pop_front(); } return task; } - void send_done_to_workers() { - assert(_communicator.rank() == _config.manager_rank && - "Only the manager can finalize the work distribution"); - assert(_free_worker_indices.size() + 1 == static_cast(_communicator.size()) && - "All workers should be free before finalizing"); - for (int i = 0; i < _communicator.size() - 1; i++) { - _communicator.send(nullptr, worker_for_idx(i), Tag::DONE); + void run_task_locally() { + TaskT task = pop_next_task(); + // Store result directly in vector (using relative indexing) + int64_t task_id = static_cast(m_tasks_sent); + ensure_result_capacity(task_id - m_front_result_idx + 1); + size_t vector_idx = task_id - m_front_result_idx; + m_pending_results[vector_idx] = m_worker_function(std::move(task)); + m_pending_results_valid[vector_idx] = true; + m_tasks_sent++; + update_contiguous_results_count(task_id); + } + + void send_next_task_to_worker(int worker_rank) { + TaskT task = pop_next_task(); + int64_t task_id = static_cast(m_tasks_sent); + + m_worker_current_task_indices[rank_to_worker_idx(worker_rank)] = task_id; + if constexpr (statistics_mode >= StatisticsMode::Aggregated) { + m_statistics.worker_task_counts[rank_to_worker_idx(worker_rank)]++; } + + m_communicator.send(task, worker_rank, Tag::TASK); + m_tasks_sent++; } - int idx_for_worker(int worker_rank) const { - assert(worker_rank != _config.manager_rank && - "Manager rank should not be used as a worker rank"); - if (worker_rank < _config.manager_rank) { - return worker_rank; + void process_incoming_message() { + MPI_Status status = m_communicator.probe(MPI_ANY_SOURCE, MPI_ANY_TAG); + int source = status.MPI_SOURCE; + if (status.MPI_TAG == Tag::RESULT) { + handle_result_message(source, status); } else { - return worker_rank - 1; + DYNAMPI_ASSERT_EQ(status.MPI_TAG, Tag::REQUEST, "Unexpected tag received"); + m_communicator.template recv_empty(source, Tag::REQUEST); } + m_free_worker_ranks.push(source); } - int worker_for_idx(int idx) const { return (idx < _config.manager_rank) ? idx : (idx + 1); } + void handle_result_message(int source, MPI_Status& probe_status) { + int worker_idx = rank_to_worker_idx(source); + int64_t task_id = m_worker_current_task_indices[worker_idx]; + m_worker_current_task_indices[worker_idx] = -1; - void process_result_message(const MPI_Status& status, ResultT&& result, int count) { using result_type = MPI_Type; - int worker_idx = status.MPI_SOURCE - (status.MPI_SOURCE > _config.manager_rank); - int64_t task_idx = _worker_current_task_indices[worker_idx]; - _worker_current_task_indices[worker_idx] = -1; - assert(task_idx >= 0 && "Task index should be valid"); - if (static_cast(task_idx) >= _results.size()) { - _results.resize(task_idx + 1); + int count; + DYNAMPI_MPI_CHECK(MPI_Get_count, (&probe_status, result_type::value, &count)); + + ResultT result_data; + result_type::resize(result_data, count); + m_communicator.recv(result_data, source, Tag::RESULT); + + // Store in vector (using relative indexing) + size_t vector_idx = task_id - m_front_result_idx; + ensure_result_capacity(vector_idx + 1); + m_pending_results[vector_idx] = std::move(result_data); + m_pending_results_valid[vector_idx] = true; + update_contiguous_results_count(task_id); + } + + std::vector collect_available_results(size_t limit) { + std::vector batch; + size_t num_results_to_return = std::min(limit, m_known_contiguous_results); + if (num_results_to_return == 0) { + return batch; } - if constexpr (result_type::resize_required) { - result_type::resize(_results[task_idx], count); + + batch.reserve(num_results_to_return); + // Extract from the beginning of the vectors (which contain contiguous results starting from + // m_front_result_idx) + batch.insert(batch.end(), std::make_move_iterator(m_pending_results.begin()), + std::make_move_iterator(m_pending_results.begin() + num_results_to_return)); + + // Erase the collected results from the beginning + m_pending_results_valid.erase(m_pending_results_valid.begin(), + m_pending_results_valid.begin() + num_results_to_return); + m_pending_results.erase(m_pending_results.begin(), + m_pending_results.begin() + num_results_to_return); + + // Update counters: increment m_front_result_idx to reflect the new starting point, + // and decrement the contiguous count. The vectors now use relative indexing + // where index 0 corresponds to task_id = m_front_result_idx. + m_front_result_idx += num_results_to_return; + m_known_contiguous_results -= num_results_to_return; + + return batch; + } + + void broadcast_done() { + for (int i = 0; i < num_workers(); i++) { + m_communicator.send(nullptr, worker_idx_to_rank(i), Tag::DONE); } - _results[task_idx] = std::move(result); - _results_received++; } - void receive_from_any_worker() { - assert(_communicator.rank() == _config.manager_rank && - "Only the manager can receive results and send tasks"); - assert(_communicator.size() > 1 && - "There should be at least one worker to receive results from"); - using result_type = MPI_Type; - MPI_Status status; - - if (_config.use_immediate_recv) { - // Immediate receive mode: REQUEST and RESULT both use type ResultT (REQUEST = 0 elements). - // recv_any(buffer) receives into the same buffer type for both. - if constexpr (result_type::resize_required) { - ResultT buffer; - result_type::resize(buffer, _config.max_result_size); - status = _communicator.recv_any(buffer); - - if (status.MPI_TAG == Tag::RESULT) { - int count; - DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, result_type::value, &count)); - // Resize buffer to actual received count (may be less than max_result_size) - result_type::resize(buffer, count); - process_result_message(status, std::move(buffer), count); - } else { - assert(status.MPI_TAG == Tag::REQUEST && "Unexpected tag received"); - } - } else { - ResultT buffer; - status = _communicator.recv_any(buffer); - - if (status.MPI_TAG == Tag::RESULT) { - int count; - DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, result_type::value, &count)); - process_result_message(status, std::move(buffer), count); - } else { - assert(status.MPI_TAG == Tag::REQUEST && "Unexpected tag received"); - } + void ensure_result_capacity(size_t required_size) { + // required_size is relative to m_front_result_idx + if (m_pending_results.size() < required_size) { + m_pending_results.resize(required_size); + m_pending_results_valid.resize(required_size, false); + } + } + + // Updates m_known_contiguous_results when a new result arrives. + // If the result extends the contiguous sequence, increment and check forward. + void update_contiguous_results_count(int64_t task_id) { + int64_t expected_task_id = + static_cast(m_front_result_idx + m_known_contiguous_results); + + // Only update if this result extends the contiguous sequence + if (task_id == expected_task_id) { + // Extend the contiguous sequence forward as far as possible + // Use relative indexing for vector access + size_t vector_idx = expected_task_id - m_front_result_idx; + while (vector_idx < m_pending_results.size() && m_pending_results_valid[vector_idx]) { + m_known_contiguous_results++; + vector_idx++; } + } + } + + static StatisticsT create_statistics(const MPICommunicator& comm) { + if constexpr (statistics_mode != StatisticsMode::None) { + return Statistics{comm.get_statistics(), {}}; } else { - // Probe mode: use probe to check message size before receiving - status = _communicator.probe(); - if (status.MPI_TAG == Tag::RESULT) { - int count; - DYNAMPI_MPI_CHECK(MPI_Get_count, (&status, result_type::value, &count)); - ResultT buffer; - if constexpr (result_type::resize_required) { - result_type::resize(buffer, count); - } - _communicator.recv(buffer, status.MPI_SOURCE, Tag::RESULT); - process_result_message(status, std::move(buffer), count); - } else { - assert(status.MPI_TAG == Tag::REQUEST && "Unexpected tag received in worker"); - _communicator.recv_empty(status.MPI_SOURCE, Tag::REQUEST); - } + return {}; } - _free_worker_indices.push(status.MPI_SOURCE); } }; -}; // namespace dynampi +} // namespace dynampi diff --git a/include/dynampi/mpi/mpi_communicator.hpp b/include/dynampi/mpi/mpi_communicator.hpp index bc81f98..316b653 100644 --- a/include/dynampi/mpi/mpi_communicator.hpp +++ b/include/dynampi/mpi/mpi_communicator.hpp @@ -10,7 +10,9 @@ #include #include +#include "dynampi/mpi/mpi_group.hpp" #include "dynampi/mpi/mpi_types.hpp" +#include "dynampi/utilities/assert.hpp" #include "dynampi/utilities/template_options.hpp" #include "mpi_error.hpp" @@ -68,8 +70,8 @@ class MPICommunicator { }; private: - MPI_Comm _comm; - Ownership _ownership; + MPI_Comm m_comm; + Ownership m_ownership; static constexpr StatisticsMode statistics_mode = get_option_value(); @@ -80,39 +82,39 @@ class MPICommunicator { public: MPICommunicator(MPI_Comm comm, Ownership ownership = Duplicate) - : _comm(comm), _ownership(ownership) { - if (_ownership == Duplicate) { - DYNAMPI_MPI_CHECK(MPI_Comm_dup, (comm, &_comm)); + : m_comm(comm), m_ownership(ownership) { + if (m_ownership == Duplicate) { + DYNAMPI_MPI_CHECK(MPI_Comm_dup, (comm, &m_comm)); } } MPICommunicator(const MPICommunicator& other) = delete; MPICommunicator& operator=(const MPICommunicator& other) = delete; MPICommunicator(MPICommunicator&& other) noexcept - : _comm(other._comm), - _ownership(other._ownership), + : m_comm(other.m_comm), + m_ownership(other.m_ownership), _statistics(std::move(other._statistics)) { - other._comm = MPI_COMM_NULL; - other._ownership = Reference; + other.m_comm = MPI_COMM_NULL; + other.m_ownership = Reference; } MPICommunicator& operator=(MPICommunicator&& other) = delete; ~MPICommunicator() { - if (_ownership != Reference) { - MPI_Comm_free(&_comm); + if (m_ownership != Reference) { + MPI_Comm_free(&m_comm); } } MPICommunicator split_by_node() const { MPI_Comm node_comm; DYNAMPI_MPI_CHECK(MPI_Comm_split_type, - (_comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &node_comm)); + (m_comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &node_comm)); return MPICommunicator(node_comm, Move); } std::optional split(int color, int key = 0) const { MPI_Comm new_comm; - DYNAMPI_MPI_CHECK(MPI_Comm_split, (_comm, color, key, &new_comm)); + DYNAMPI_MPI_CHECK(MPI_Comm_split, (m_comm, color, key, &new_comm)); if (new_comm == MPI_COMM_NULL) { return std::nullopt; } @@ -120,7 +122,7 @@ class MPICommunicator { return MPICommunicator(new_comm, Move); } - operator MPI_Comm() const { return _comm; } + operator MPI_Comm() const { return m_comm; } const CommStatistics& get_statistics() const requires(statistics_mode != StatisticsMode::None) @@ -130,13 +132,13 @@ class MPICommunicator { int rank() const { int rank; - DYNAMPI_MPI_CHECK(MPI_Comm_rank, (_comm, &rank)); + DYNAMPI_MPI_CHECK(MPI_Comm_rank, (m_comm, &rank)); return rank; } int size() const { int size; - DYNAMPI_MPI_CHECK(MPI_Comm_size, (_comm, &size)); + DYNAMPI_MPI_CHECK(MPI_Comm_size, (m_comm, &size)); return size; } @@ -144,7 +146,7 @@ class MPICommunicator { inline void send(const T& data, int dest, int tag = 0) { using mpi_type = MPI_Type; DYNAMPI_MPI_CHECK( - MPI_Send, (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, dest, tag, _comm)); + MPI_Send, (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, dest, tag, m_comm)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.send_count++; int size; @@ -153,12 +155,28 @@ class MPICommunicator { } } + inline MPI_Status probe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { + MPI_Status status; + DYNAMPI_MPI_CHECK(MPI_Probe, (source, tag, m_comm, &status)); + return status; + } + + inline std::optional iprobe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { + MPI_Status status; + int flag; + DYNAMPI_MPI_CHECK(MPI_Iprobe, (source, tag, m_comm, &flag, &status)); + if (flag) { + return status; + } + return std::nullopt; + } + template inline void recv(T& data, int source, int tag = 0) { using mpi_type = MPI_Type; MPI_Status status; DYNAMPI_MPI_CHECK(MPI_Recv, (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, - source, tag, _comm, &status)); + source, tag, m_comm, &status)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.recv_count++; int actual_count; @@ -169,20 +187,13 @@ class MPICommunicator { } } - // Probe for a message, returns status - inline MPI_Status probe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { - MPI_Status status; - DYNAMPI_MPI_CHECK(MPI_Probe, (source, tag, _comm, &status)); - return status; - } - // Receive with MPI_ANY_SOURCE/MPI_ANY_TAG and return status template inline MPI_Status recv_any(T& data, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { using mpi_type = MPI_Type; MPI_Status status; DYNAMPI_MPI_CHECK(MPI_Recv, (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, - source, tag, _comm, &status)); + source, tag, m_comm, &status)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.recv_count++; int actual_count; @@ -205,7 +216,7 @@ class MPICommunicator { } } DYNAMPI_MPI_CHECK(MPI_Bcast, - (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, root, _comm)); + (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, root, m_comm)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.collective_count++; } @@ -214,7 +225,7 @@ class MPICommunicator { inline void recv_empty_message(int source, int tag = 0) { using mpi_type = MPI_Type; DYNAMPI_MPI_CHECK(MPI_Recv, (nullptr, mpi_type::count(nullptr), mpi_type::value, source, tag, - _comm, MPI_STATUS_IGNORE)); + m_comm, MPI_STATUS_IGNORE)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.recv_count++; } @@ -225,7 +236,7 @@ class MPICommunicator { template inline void send_empty(int dest, int tag = 0) { using mpi_type = MPI_Type; - DYNAMPI_MPI_CHECK(MPI_Send, (nullptr, 0, mpi_type::value, dest, tag, _comm)); + DYNAMPI_MPI_CHECK(MPI_Send, (nullptr, 0, mpi_type::value, dest, tag, m_comm)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.send_count++; } @@ -236,13 +247,29 @@ class MPICommunicator { inline void recv_empty(int source, int tag = 0) { using mpi_type = MPI_Type; DYNAMPI_MPI_CHECK(MPI_Recv, - (nullptr, 0, mpi_type::value, source, tag, _comm, MPI_STATUS_IGNORE)); + (nullptr, 0, mpi_type::value, source, tag, m_comm, MPI_STATUS_IGNORE)); if constexpr (statistics_mode != StatisticsMode::None) { _statistics.recv_count++; } } - [[nodiscard]] MPI_Comm get() const { return _comm; } + template + inline void gather(const T& data, std::vector* result, int root = 0) { + DYNAMPI_ASSERT_EQ(result != nullptr, root == rank(), + "Gather result must be provided only on the root rank"); + using mpi_type = MPI_Type; + DYNAMPI_MPI_CHECK(MPI_Gather, (mpi_type::ptr(data), mpi_type::count(data), mpi_type::value, + result == nullptr ? nullptr : result->data(), + mpi_type::count(data), mpi_type::value, root, m_comm)); + if constexpr (statistics_mode != StatisticsMode::None) { + _statistics.collective_count++; + } + } + + [[nodiscard]] MPI_Comm get() const { return m_comm; } + + // Get the group associated with this communicator + [[nodiscard]] MPIGroup get_group() const { return MPIGroup(*this); } }; } // namespace dynampi diff --git a/include/dynampi/mpi/mpi_error.hpp b/include/dynampi/mpi/mpi_error.hpp index c4c2d43..b87f860 100644 --- a/include/dynampi/mpi/mpi_error.hpp +++ b/include/dynampi/mpi/mpi_error.hpp @@ -9,6 +9,7 @@ #include +// cppcheck-suppress preprocessorErrorDirective #if __has_include() #include #if defined(__cpp_lib_source_location) diff --git a/include/dynampi/mpi/mpi_group.hpp b/include/dynampi/mpi/mpi_group.hpp new file mode 100644 index 0000000..c8868dc --- /dev/null +++ b/include/dynampi/mpi/mpi_group.hpp @@ -0,0 +1,94 @@ +/* + * SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +#include "dynampi/utilities/assert.hpp" +#include "mpi_error.hpp" + +namespace dynampi { + +// Forward declaration +template +class MPICommunicator; + +class MPIGroup { + private: + MPI_Group m_group; + + public: + // Create from a communicator (extracts the group) + template + explicit MPIGroup(const MPICommunicator& comm) { + DYNAMPI_MPI_CHECK(MPI_Comm_group, (comm.get(), &m_group)); + } + + // Non-copyable + MPIGroup(const MPIGroup& other) = delete; + MPIGroup& operator=(const MPIGroup& other) = delete; + + // Movable + MPIGroup(MPIGroup&& other) noexcept : m_group(other.m_group) { other.m_group = MPI_GROUP_NULL; } + MPIGroup& operator=(MPIGroup&& other) noexcept { + if (this != &other) { + if (m_group != MPI_GROUP_NULL) { + MPI_Group_free(&m_group); + } + m_group = other.m_group; + other.m_group = MPI_GROUP_NULL; + } + return *this; + } + + ~MPIGroup() { + if (m_group != MPI_GROUP_NULL) { + MPI_Group_free(&m_group); + } + } + + // Translate ranks from this group to another group + void translate_ranks(const MPIGroup& to_group, int n, const int ranks[], + int translated_ranks[]) const { + DYNAMPI_MPI_CHECK(MPI_Group_translate_ranks, + (m_group, n, ranks, to_group.m_group, translated_ranks)); + } + + // Convenience method for single rank translation + int translate_rank(int rank, const MPIGroup& to_group) const { + int translated_rank; + translate_ranks(to_group, 1, &rank, &translated_rank); + return translated_rank; + } + + // Get the size of this group + int size() const { + int size; + DYNAMPI_MPI_CHECK(MPI_Group_size, (m_group, &size)); + return size; + } + + // Get the rank of the calling process in this group (MPI_UNDEFINED if not in group) + int rank() const { + int rank; + DYNAMPI_MPI_CHECK(MPI_Group_rank, (m_group, &rank)); + return rank; + } + + // Check if a rank (from a reference group, typically the world group) is in this group + // Returns the rank in this group, or MPI_UNDEFINED if not found + // Note: This translates FROM reference_group TO this group + static int contains_rank_in_group(int rank_in_reference, const MPIGroup& reference_group, + const MPIGroup& target_group) { + return reference_group.translate_rank(rank_in_reference, target_group); + } + + operator MPI_Group() const { return m_group; } + + [[nodiscard]] MPI_Group get() const { return m_group; } +}; + +} // namespace dynampi diff --git a/include/dynampi/mpi/mpi_types.hpp b/include/dynampi/mpi/mpi_types.hpp index 5fa4ba9..5614299 100644 --- a/include/dynampi/mpi/mpi_types.hpp +++ b/include/dynampi/mpi/mpi_types.hpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -53,6 +54,9 @@ DYNAMPI_DEFINE_PRIMITIVE_MPI_TYPE(double, MPI_DOUBLE); DYNAMPI_DEFINE_PRIMITIVE_MPI_TYPE(long double, MPI_LONG_DOUBLE); #if defined(MPI_CXX_BOOL) DYNAMPI_DEFINE_PRIMITIVE_MPI_TYPE(bool, MPI_CXX_BOOL); +#else +// Fallback for when MPI_CXX_BOOL is not available (e.g. Microsoft-MPI) +DYNAMPI_DEFINE_PRIMITIVE_MPI_TYPE(bool, MPI_C_BOOL); #endif template <> diff --git a/include/dynampi/utilities/assert.hpp b/include/dynampi/utilities/assert.hpp new file mode 100644 index 0000000..14f4cd0 --- /dev/null +++ b/include/dynampi/utilities/assert.hpp @@ -0,0 +1,145 @@ +/* + * SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +#include +#ifndef _MSC_VER +#define DYNAMPI_HAS_BUILTIN(x) __has_builtin(x) +#else +#define DYNAMPI_HAS_BUILTIN(x) 0 +#endif + +#ifndef NDEBUG +#include +#include + +// cppcheck-suppress preprocessorErrorDirective +#if __has_include() +#include +#if defined(__cpp_lib_source_location) +#define DYNAMPI_HAS_SOURCE_LOCATION +#endif +#elif __has_include() +#include +namespace std { +using source_location = std::experimental::source_location; +} +#define DYNAMPI_HAS_SOURCE_LOCATION +#endif + +#include +#include + +#include "printing.hpp" +#endif + +namespace dynampi { + +#ifndef NDEBUG +template +std::optional OptionalString(Args &&...args) { + if constexpr (sizeof...(args) == 0) { + return std::nullopt; + } else { + std::stringstream ss; + (ss << ... << args); + return ss.str(); + } +} + +#define DYNAMPI_ASSERT(condition, ...) \ + do { \ + if (!(condition)) \ + dynampi::_DYNAMPI_FAIL_ASSERT(#condition, dynampi::OptionalString(__VA_ARGS__)); \ + } while (false) + +inline void _DYNAMPI_FAIL_ASSERT(const std::string &condition_str, + const std::optional &message +#ifdef DYNAMPI_HAS_SOURCE_LOCATION + , + const std::source_location &loc = std::source_location::current() +#endif +) { + if (!std::uncaught_exceptions()) { + std::stringstream ss; +#ifdef DYNAMPI_HAS_SOURCE_LOCATION + std::string_view s = loc.file_name(); + std::string_view filename = s.substr(s.find_last_of('/') + 1); +#endif + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + ss << "DynaMPI assertion failed on rank " << rank << ": " << condition_str + << (message ? " " + *message : "") + +#ifdef DYNAMPI_HAS_SOURCE_LOCATION + << "\n in " << loc.function_name() << " at " << filename << ":" << loc.line() +#endif + << std::endl; + std::cerr << ss.str(); + throw std::runtime_error(ss.str()); + } +} + +#define DYNAMPI_ASSERT_BIN_OP(a, b, op, nop, ...) \ + do { \ + const auto A = a; \ + const auto B = b; \ + if (!((A)op(B))) \ + dynampi::_DYNAMPI_FAILBinOp((A), (B), (#a), (#b), (#nop), \ + dynampi::OptionalString(__VA_ARGS__)); \ + } while (false) + +template +inline void _DYNAMPI_FAILBinOp(const A &a, const B &b, const std::string &a_str, + const std::string &b_str, const std::string &nop, + const std::optional &message +#ifdef DYNAMPI_HAS_SOURCE_LOCATION + , + const std::source_location &loc = std::source_location::current() +#endif +) { + std::stringstream ss; + ss << a << " " << nop << " " << b; + dynampi::_DYNAMPI_FAIL_ASSERT(a_str + " " + nop + " " + b_str, + message ? (ss.str() + " " + *message) : ss.str() +#ifdef DYNAMPI_HAS_SOURCE_LOCATION + , + loc +#endif + ); +} + +#else +#define DYNAMPI_ASSERT(condition, ...) \ + do { \ + } while (false) +#define DYNAMPI_ASSERT_BIN_OP(a, b, op, nop, ...) \ + do { \ + } while (false) +#endif + +#define DYNAMPI_FAIL(...) \ + DYNAMPI_ASSERT(false, __VA_ARGS__); \ + DYNAMPI_UNREACHABLE() // LCOV_EXCL_LINE + +#define DYNAMPI_UNIMPLEMENTED(...) DYNAMPI_FAIL("DYNAMPI_UNIMPLEMENTED") + +#define DYNAMPI_ASSERT_GE(expr, val, ...) DYNAMPI_ASSERT_BIN_OP(expr, val, >=, <, __VA_ARGS__) +#define DYNAMPI_ASSERT_LE(expr, val, ...) DYNAMPI_ASSERT_BIN_OP(expr, val, <=, >, __VA_ARGS__) +#define DYNAMPI_ASSERT_GT(expr, val, ...) DYNAMPI_ASSERT_BIN_OP(expr, val, >, <=, __VA_ARGS__) +#define DYNAMPI_ASSERT_LT(expr, val, ...) DYNAMPI_ASSERT_BIN_OP(expr, val, <, >=, __VA_ARGS__) +#define DYNAMPI_ASSERT_EQ(expr, val, ...) DYNAMPI_ASSERT_BIN_OP(expr, val, ==, !=, __VA_ARGS__) +#define DYNAMPI_ASSERT_NE(expr, val, ...) DYNAMPI_ASSERT_BIN_OP(expr, val, !=, ==, __VA_ARGS__) + +#if defined(_MSC_VER) && !defined(__clang__) // MSVC +#define DYNAMPI_UNREACHABLE() __assume(false) +#else // GCC, Clang +#define DYNAMPI_UNREACHABLE() __builtin_unreachable() +#endif + +} // namespace dynampi diff --git a/include/dynampi/utilities/printing.hpp b/include/dynampi/utilities/printing.hpp new file mode 100644 index 0000000..440ee24 --- /dev/null +++ b/include/dynampi/utilities/printing.hpp @@ -0,0 +1,110 @@ +/* + * SPDX-FileCopyrightText: 2025 QDX Technologies. Authored by Ryan Stocks + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dynampi { + +template +inline std::ostream& operator<<(std::ostream& os, const std::set& set); +template +inline std::ostream& operator<<(std::ostream& os, const std::vector& vec); +template +inline std::ostream& operator<<(std::ostream& os, const std::array& arr); +template +inline std::ostream& operator<<(std::ostream& os, const std::span& vec); +template +inline std::ostream& operator<<(std::ostream& os, const std::optional& op); +template +inline std::ostream& operator<<(std::ostream& os, const std::tuple& tup); +template +inline std::ostream& operator<<(std::ostream& os, const std::pair& pair); +inline std::ostream& operator<<(std::ostream& os, const std::byte& b); + +// --------------- IMPLEMENTATIONS --------------- + +inline std::ostream& operator<<(std::ostream& os, const std::byte& b) { + return os << static_cast(b); +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::span& vec) { + os << "["; + for (std::size_t i = 0; i < vec.size(); i++) { + os << vec[i]; + if (i < vec.size() - 1) { + os << ", "; + } + } + return os << "]"; +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + return os << std::span(vec); +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::array& arr) { + os << "["; + for (std::size_t i = 0; i < arr.size(); i++) { + os << arr[i]; + if (i < arr.size() - 1) { + os << ", "; + } + } + return os << "]"; +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::set& set) { + os << "{"; + auto it = set.begin(); + while (it != set.end()) { + os << *it; + ++it; + if (it != set.end()) { + os << ", "; + } + } + return os << "}"; +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::pair& pair) { + return os << "(" << pair.first << ", " << pair.second << ")"; +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::optional& op) { + if (op.has_value()) { + return os << "Some(" << op.value() << ")"; + } + return os << "None"; +} + +template +inline std::ostream& operator<<(std::ostream& os, const std::tuple& tup) { + os << "("; + std::apply( + [&os](const Args&... args) { + std::size_t i = 0; + ((os << args << (++i < sizeof...(Args) ? ", " : "")), ...); + }, + tup); + return os << ")"; +} + +} // namespace dynampi diff --git a/include/dynampi/utilities/timer.hpp b/include/dynampi/utilities/timer.hpp index a7d50c3..6df8f9b 100644 --- a/include/dynampi/utilities/timer.hpp +++ b/include/dynampi/utilities/timer.hpp @@ -14,7 +14,7 @@ namespace dynampi { class Timer { std::optional> _start_time; - std::chrono::duration _elapsed_time{0.0}; + std::chrono::nanoseconds _elapsed_time{0}; public: enum class AutoStart { Yes, No }; @@ -33,14 +33,15 @@ class Timer { std::chrono::duration stop() { assert(_start_time.has_value() && "Timer not started"); auto end_time = std::chrono::high_resolution_clock::now(); - _elapsed_time += end_time - _start_time.value(); + _elapsed_time += + std::chrono::duration_cast(end_time - _start_time.value()); _start_time.reset(); - return _elapsed_time; + return std::chrono::duration(_elapsed_time); } void reset(AutoStart auto_start = AutoStart::Yes) { _start_time.reset(); - _elapsed_time = std::chrono::duration(0.0); + _elapsed_time = std::chrono::nanoseconds{0}; if (auto_start == AutoStart::Yes) { start(); } @@ -48,9 +49,12 @@ class Timer { [[nodiscard]] std::chrono::duration elapsed() const { if (_start_time.has_value()) { - return _elapsed_time + (std::chrono::high_resolution_clock::now() - _start_time.value()); + auto current_elapsed = + _elapsed_time + std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - _start_time.value()); + return std::chrono::duration(current_elapsed); } - return _elapsed_time; + return std::chrono::duration(_elapsed_time); } friend std::ostream& operator<<(std::ostream& os, const Timer& timer) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ac05872..aefda3f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -79,6 +79,11 @@ endif() # -------------------------------------------------------------------- # Add MPI tests with varying ranks +set(DYNAMPI_MAX_MPI_RANK 64 CACHE STRING "Max MPI ranks for ctest MPI runs") +if(DEFINED ENV{DYNAMPI_MAX_MPI_RANK}) + set(DYNAMPI_MAX_MPI_RANK "$ENV{DYNAMPI_MAX_MPI_RANK}") +endif() + function(add_mpi_test test_name num_procs) set(command ${MPIEXEC_EXECUTABLE} @@ -91,6 +96,10 @@ function(add_mpi_test test_name num_procs) add_test(NAME ${test_name} COMMAND ${command}) endfunction() -foreach(rank 1 2 3 4 8) +foreach(rank 1 2 3 4 8 16 64) + if(rank GREATER DYNAMPI_MAX_MPI_RANK) + message(STATUS "Skipping mpi_test_${rank}_rank (rank > ${DYNAMPI_MAX_MPI_RANK})") + else() add_mpi_test(mpi_test_${rank}_rank ${rank}) + endif() endforeach() diff --git a/test/lsan.supp b/test/lsan.supp index 3c24107..4e68452 100644 --- a/test/lsan.supp +++ b/test/lsan.supp @@ -3,6 +3,20 @@ leak:ompi_op_base_op_select leak:ompi_mpi_init leak:ompi_mpi_finalize +leak:ompi_comm_init_mpi3 +leak:pmix_hash_fetch +leak:opal_reachable_allocate +leak:pmix_pointer_array_list +leak:fetch_nodeinfo +leak:mca_btl_tcp_proc_create +leak:opal_vasprintf +leak:PMIx_Value_create +leak:fetch_appinfo +leak:fetch_sessioninfo +leak:avx_component_op_query +leak:pmix_pointer_array_init +leak:pmix_bfrops_base_copy_value +leak:hwloc__add_info leak:orte_finalize leak:libevent_core leak:event_base_loop diff --git a/test/mpi/test_distributers.cpp b/test/mpi/test_distributers.cpp index 4b666f2..7efa4e9 100644 --- a/test/mpi/test_distributers.cpp +++ b/test/mpi/test_distributers.cpp @@ -6,26 +6,65 @@ #include #include +#include #include #include #include +#include #include #include "dynampi/impl/hierarchical_distributor.hpp" #include "dynampi/mpi/mpi_communicator.hpp" #include "mpi_test_environment.hpp" -// --- Configuration Wrapper --- -template