From 82dc0cc3b647b61799a185385047f768d64c6e9b Mon Sep 17 00:00:00 2001 From: Roie Danino Date: Tue, 22 Apr 2025 15:26:52 +0000 Subject: [PATCH] VERIFIER/TEAM: added tests for basic team management operations Signed-off-by: Roie Danino --- verifier/Makefile.am | 12 +++ verifier/configure.ac | 8 ++ verifier/osh_exec.c | 4 + verifier/osh_tests.h | 2 + verifier/team/osh_team.c | 118 +++++++++++++++++++++++++ verifier/team/osh_team_split_2d.c | 76 ++++++++++++++++ verifier/team/osh_team_split_strided.c | 73 +++++++++++++++ verifier/team/osh_team_tests.h | 20 +++++ 8 files changed, 313 insertions(+) create mode 100644 verifier/team/osh_team.c create mode 100644 verifier/team/osh_team_split_2d.c create mode 100644 verifier/team/osh_team_split_strided.c create mode 100644 verifier/team/osh_team_tests.h diff --git a/verifier/Makefile.am b/verifier/Makefile.am index 534509a..a0fca8a 100644 --- a/verifier/Makefile.am +++ b/verifier/Makefile.am @@ -53,6 +53,11 @@ noinst_HEADERS = \ nbi/osh_nbi_tests.h \ misc/osh_misc_tests.h +if HAVE_TEAM +noinst_HEADERS += \ + team/osh_team_tests.h +endif + oshmem_test_SOURCES = \ osh_main.c \ osh_exec.c \ @@ -265,6 +270,13 @@ oshmem_test_SOURCES += \ coll/osh_coll_tc12.c endif +if HAVE_TEAM +oshmem_test_SOURCES += \ + team/osh_team.c \ + team/osh_team_split_strided.c \ + team/osh_team_split_2d.c +endif + if ENABLE_MPI noinst_HEADERS += \ mix/osh_mix.h diff --git a/verifier/configure.ac b/verifier/configure.ac index 0c3f655..49f926c 100644 --- a/verifier/configure.ac +++ b/verifier/configure.ac @@ -74,6 +74,14 @@ AC_CHECK_DECLS([shmem_uint_atomic_and, shmem_ulong_atomic_and, shmem_ulonglong_a shmem_uint_atomic_xor, shmem_ulong_atomic_xor, shmem_ulonglong_atomic_xor], [], [], [#include "shmem.h"]) +AC_CHECK_DECLS([shmem_team_my_pe, shmem_team_split_strided, shmem_team_n_pes, + shmem_team_get_config, shmem_team_translate_pe, shmem_team_split_strided, + shmem_team_split_2d, shmem_team_destroy], + [AM_CONDITIONAL([HAVE_TEAM],[true]) + CFLAGS="$CFLAGS -DHAVE_TEAM"], + [AM_CONDITIONAL([HAVE_TEAM],[false])], + [#include "shmem.h"]) + AC_CHECK_HEADERS([unistd.h]) AC_CONFIG_FILES([Makefile]) AC_OUTPUT diff --git a/verifier/osh_exec.c b/verifier/osh_exec.c index 25c0517..dcfe83b 100644 --- a/verifier/osh_exec.c +++ b/verifier/osh_exec.c @@ -29,6 +29,7 @@ extern const TE_NODE mix_tcs[]; extern const TE_NODE analysis_tcs[]; extern const TE_NODE nbi_tcs[]; extern const TE_NODE misc_tcs[]; +extern const TE_NODE team_tcs[]; @@ -48,6 +49,9 @@ const TE_NODE exec_tst[] = { analysis_tcs, proc_tst_analysis, "analysis", aopt_set_string( "an" ), "Run " MODULE_NAME " ANALYSIS test suite.", TEST_IGNORE}, { nbi_tcs, proc_tst_nbi, "nbi", aopt_set_string( "nb" ), "Run " MODULE_NAME " NB DATA Transfer test suite.", TEST_RUN}, { misc_tcs, proc_tst_misc, "misc", aopt_set_string( "ms" ), "Run " MODULE_NAME " AUX test suite.", TEST_RUN}, +#ifdef HAVE_DECL_SHMEM_TEAM_SPLIT_STRIDED + { team_tcs, proc_tst_team, "team", aopt_set_string( "tm" ), "Run " MODULE_NAME " TEAM test suite.", TEST_RUN}, +#endif { NULL, NULL, NULL, aopt_set_string( NULL ), NULL } }; diff --git a/verifier/osh_tests.h b/verifier/osh_tests.h index 5cf41e3..da02ffa 100644 --- a/verifier/osh_tests.h +++ b/verifier/osh_tests.h @@ -23,6 +23,8 @@ int proc_tst_mix( const TE_NODE *node, int argc, const char **argv ); int proc_tst_analysis( const TE_NODE *node, int argc, const char **argv ); int proc_tst_nbi( const TE_NODE *node, int argc, const char **argv ); int proc_tst_misc( const TE_NODE *node, int argc, const char **argv ); +int proc_tst_team( const TE_NODE *node, int argc, const char **argv ); + #if HAVE_DECL_SHMEM_UINT_ATOMIC_FETCH_AND # define TEST_INT_FETCH_AND TEST_RUN diff --git a/verifier/team/osh_team.c b/verifier/team/osh_team.c new file mode 100644 index 0000000..f6176d8 --- /dev/null +++ b/verifier/team/osh_team.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2025 Nvidia Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "osh_def.h" +#include "osh_cmn.h" +#include "osh_log.h" + +#include "osh_tests.h" +#include "osh_team_tests.h" + +const TE_NODE team_tcs[] = +{ + { NULL, test_team_split_strided, "team_split_strided", aopt_set_string( "team_split_strided" ), "Strided splitting of a team: A correctness test.", TEST_RUN}, + { NULL, test_team_split_2d, "team_split_2d", aopt_set_string( "team_split_2d" ), "2D splitting of a team: A correctness test.", TEST_RUN}, + { NULL, NULL, NULL, aopt_set_string( NULL ), NULL, TEST_IGNORE} +}; + +/* + * List of supported test case options. + */ +static const AOPT_DESC self_opt_desc[] = +{ + { 0, AOPT_NOARG, aopt_set_literal( 0 ), aopt_set_string( NULL ), NULL } +}; + + +int proc_tst_team( const TE_NODE *node, int argc, const char **argv ) +{ + OSH_ERROR status = OSH_ERR_NONE; + const AOPT_OBJECT* self_opt_obj = NULL; + + UNREFERENCED_PARAMETER(argc); + + if (!node) + { + return OSH_ERR_FATAL; + } + + /* Load supported option and create option objects */ + { + int temp_argc = 0; + + temp_argc = argc; + self_opt_obj = aopt_init(&temp_argc, (const char **)argv, self_opt_desc); + if (!self_opt_obj) + { + status = OSH_ERR_FATAL; + } + } + + if (!status && self_opt_obj) + { + /* Parse specific options */ + } + + if (status) + { + /* Display help information */ + const char* help_str = NULL; + char temp_buf[30]; + + log_help("%s: %s\n", display_opt(node, temp_buf, sizeof(temp_buf)), node->note); + log_help("\n"); + log_help("Valid arguments:\n"); + help_str = aopt_help(self_opt_desc); + if (help_str) + { + log_help("%s", help_str); + sys_free((void*)help_str); + log_help("\n"); + } + } + + /* Destroy option objects */ + aopt_exit((AOPT_OBJECT*)self_opt_obj); + + return status; +} + +/** + * Validate that the pe translation back to the global team is correct. + * This is a sanity check to ensure that the team split is done correctly. + * returns TC_PASS if the translation is correct, TC_FAIL otherwise. + */ +int validate_pe_translation(shmem_team_t team) +{ + int expected_global_pe = shmem_my_pe(); + int team_pe = shmem_team_my_pe(team); + int global_pe = shmem_team_translate_pe(team, team_pe, SHMEM_TEAM_WORLD); + + // PE is might not part of the team, so we return TC_PASS in that case + if (team_pe == -1) { + return TC_PASS; + } + + if (global_pe != expected_global_pe) { + log_error(OSH_TC, "Error: failed to translate pe back to global pe: global_pe %d != %d\n", global_pe, expected_global_pe); + return TC_FAIL; + } + + return TC_PASS; +} + +int validate_team_npes(shmem_team_t team, int expected_npes, const char *team_name) +{ + int npes = shmem_team_n_pes(team); + if (npes != expected_npes) { + log_error(OSH_TC, "Error: %s npes size mismatch: %d != %d\n", team_name, npes, expected_npes); + return TC_FAIL; + } + + return TC_PASS; +} diff --git a/verifier/team/osh_team_split_2d.c b/verifier/team/osh_team_split_2d.c new file mode 100644 index 0000000..fcd208c --- /dev/null +++ b/verifier/team/osh_team_split_2d.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2025 Nvidia Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include + +#include +#include + +#include "osh_def.h" +#include "osh_cmn.h" +#include "osh_log.h" + +#include "osh_team_tests.h" + + +int _test_team_split_2d(int xrange) +{ + int status = TC_PASS; + int npes = shmem_n_pes(); + int expected_yrange = npes / xrange; + shmem_team_t x_team, y_team; + + status = shmem_team_split_2d(SHMEM_TEAM_WORLD, xrange, NULL, 0, &x_team, + NULL, 0, &y_team); + if (status || + (x_team == SHMEM_TEAM_INVALID) || + (y_team == SHMEM_TEAM_INVALID)) { + log_error(OSH_TC, "Error: shmem_team_split_2d failed\n"); + return TC_FAIL; + } + + if (validate_pe_translation(x_team) != TC_PASS || + validate_pe_translation(y_team) != TC_PASS) { + log_error(OSH_TC, "Error: x_team or y_team pe translation failed\n"); + status = TC_FAIL; + goto out_free_teams; + } + + if (validate_team_npes(x_team, xrange, "x_team") != TC_PASS || + validate_team_npes(y_team, expected_yrange, "y_team") != TC_PASS) { + log_error(OSH_TC, "Error: x_team or y_team npes size not as expected\n"); + status = TC_FAIL; + goto out_free_teams; + } + +out_free_teams: + shmem_team_destroy(x_team); + shmem_team_destroy(y_team); + return status; +} + +int test_team_split_2d(const TE_NODE *node, int argc, const char *argv[]) +{ + int status = TC_PASS; + int npes = shmem_n_pes(); + int xrange; + + UNREFERENCED_PARAMETER(node); + UNREFERENCED_PARAMETER(argc); + UNREFERENCED_PARAMETER(argv); + + for (xrange = 2; xrange <= npes; ++xrange) { + status = _test_team_split_2d(xrange); + if (status != TC_PASS) { + log_error(OSH_TC, "Error: test_team_split_2d failed for xrange %d\n", xrange); + return status; + } + } + + return status; +} diff --git a/verifier/team/osh_team_split_strided.c b/verifier/team/osh_team_split_strided.c new file mode 100644 index 0000000..fdb9463 --- /dev/null +++ b/verifier/team/osh_team_split_strided.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2025 Nvidia Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include + +#include +#include + +#include "osh_def.h" +#include "osh_cmn.h" +#include "osh_log.h" + +#include "osh_team_tests.h" + + +static int _test_team_split_strided(int start, int stride) +{ + int status = TC_PASS; + int npes = shmem_n_pes(); + int expected_npes = (npes - start) / stride; + shmem_team_t new_team; + + status = shmem_team_split_strided(SHMEM_TEAM_WORLD, start, stride, expected_npes, NULL, 0, + &new_team); + if (status || (new_team == SHMEM_TEAM_INVALID)) { + log_error(OSH_TC, "Error: shmem_team_split_strided failed\n"); + return TC_FAIL; + } + + if (validate_pe_translation(new_team) != TC_PASS) { + log_error(OSH_TC, "Error: pe translation to global team failed\n"); + status = TC_FAIL; + goto out; + } + + if (validate_team_npes(new_team, expected_npes, "strided_team") != TC_PASS) { + status = TC_FAIL; + goto out; + } + +out: + shmem_team_destroy(new_team); + return status; +} + + +int test_team_split_strided(const TE_NODE *node, int argc, const char *argv[]) +{ + int status = TC_PASS; + int npes = shmem_n_pes(); + int stride, start; + + UNREFERENCED_PARAMETER(node); + UNREFERENCED_PARAMETER(argc); + UNREFERENCED_PARAMETER(argv); + + for (stride = 2; stride <= npes / 2; ++stride) { + for (start = 0; start < (npes / stride); ++start) { + status = _test_team_split_strided(start, stride); + if (status != TC_PASS) { + log_error(OSH_TC, "Error: test_team_split_strided failed for stride %d, start %d\n", stride, start); + return status; + } + } + } + + return status; +} diff --git a/verifier/team/osh_team_tests.h b/verifier/team/osh_team_tests.h new file mode 100644 index 0000000..f6eb9b2 --- /dev/null +++ b/verifier/team/osh_team_tests.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2025 Nvidia Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef __OSH_TEAM_H__ +#define __OSH_TEAM_H__ + +#include + +int validate_pe_translation(shmem_team_t team); +int validate_team_npes(shmem_team_t team, int expected_npes, const char *team_name); + +int test_team_split_strided(const TE_NODE *node, int argc, const char *argv[]); +int test_team_split_2d(const TE_NODE *node, int argc, const char *argv[]); + +#endif