diff --git a/circle.yml b/circle.yml index 7b9d63512..25fff329f 100644 --- a/circle.yml +++ b/circle.yml @@ -21,8 +21,8 @@ test: parallel: true - bash ./dash/scripts/circleci/run-docker.sh Minimal gnu: parallel: true - - bash ./dash/scripts/circleci/run-docker.sh Nasty gnu: - parallel: true +# - bash ./dash/scripts/circleci/run-docker.sh Nasty gnu: +# parallel: true - bash ./dash/scripts/circleci/run-docker.sh Release clang: parallel: true - grep "FAIL" ./dash-ci.log && (echo "Full log:" ; cat ./dash-ci.log ; exit 1) || exit 0: diff --git a/dart-if/include/dash/dart/if/dart_communication.h b/dart-if/include/dash/dart/if/dart_communication.h index 688b413d0..3987ae083 100644 --- a/dart-if/include/dash/dart/if/dart_communication.h +++ b/dart-if/include/dash/dart/if/dart_communication.h @@ -329,10 +329,13 @@ dart_ret_t dart_compare_and_swap( * is guaranteed. A later flush operation is needed to guarantee * local and remote completion. * - * \param dest The local destination buffer to store the data to. - * \param gptr A global pointer determining the source of the get operation. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param dest The local destination buffer to store the data to. + * \param gptr A global pointer determining the source of the get operation. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values at the source. + * \param dst_type The data type of the values in buffer \c dest. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -343,7 +346,8 @@ dart_ret_t dart_get( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** * 'REGULAR' variant of dart_put. @@ -352,10 +356,13 @@ dart_ret_t dart_get( * is guaranteed. A later flush operation is needed to guarantee * local and remote completion. * - * \param gptr A global pointer determining the target of the put operation. - * \param src The local source buffer to load the data from. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c src. + * \param gptr A global pointer determining the target of the put operation. + * \param src The local source buffer to load the data from. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values in buffer \c src. + * \param dst_type The data type of the values at the target. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -366,7 +373,8 @@ dart_ret_t dart_put( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** @@ -453,49 +461,6 @@ dart_ret_t dart_flush_local_all( */ typedef struct dart_handle_struct * dart_handle_t; -typedef enum { - STRIDED_TO_STRIDED = 0, - STRIDED_TO_CONTIG, - CONTIG_TO_STRIDED -} dart_stride_option; - -dart_ret_t dart_get_strided_handle( - void * dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - size_t stride, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t * handle); - -dart_ret_t dart_get_indexed_handle( - void * dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - int* indexes, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t * handle); - -/** - * 'HANDLE' variant of dart_get. - * Neither local nor remote completion is guaranteed. A later - * dart_wait*() call or a fence/flush operation is needed to guarantee - * completion. - * - * \param dest Local target memory to store the data. - * \param gptr Global pointer being the source of the data transfer. - * \param nelem The number of elements of \c dtype in buffer \c dest. - * \param dtype The data type of the values in buffer \c dest. - * \param[out] handle Pointer to DART handle to instantiate for later use with \c dart_wait, \c dart_wait_all etc. - * - * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. - * - * \threadsafe - * \ingroup DartCommunication - */ #define DART_HANDLE_NULL (dart_handle_t)NULL /** @@ -504,12 +469,15 @@ dart_ret_t dart_get_indexed_handle( * dart_wait*() call or a fence/flush operation is needed to guarantee * completion. * - * \param dest Local target memory to store the data. - * \param gptr Global pointer being the source of the data transfer. - * \param nelem The number of elements of \c dtype in buffer \c dest. - * \param dtype The data type of the values in buffer \c dest. + * \param dest Local target memory to store the data. + * \param gptr Global pointer being the source of the data transfer. + * \param nelem The number of elements of \c dtype in buffer \c dest. + * \param src_type The data type of the values at the source. + * \param dst_type The data type of the values in buffer \c dest. * \param[out] handle Pointer to DART handle to instantiate for later use with \c dart_wait, \c dart_wait_all etc. * + * \note Base-type conversion is not performed. + * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * * \threadsafe @@ -519,7 +487,8 @@ dart_ret_t dart_get_handle( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handle) DART_NOTHROW; /** @@ -528,12 +497,15 @@ dart_ret_t dart_get_handle( * dart_wait*() call or a fence/flush operation is needed to guarantee * completion. * - * \param gptr Global pointer being the target of the data transfer. - * \param src Local source memory to transfer data from. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param gptr Global pointer being the target of the data transfer. + * \param src Local source memory to transfer data from. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values in buffer \c src. + * \param dst_type The data type of the values at the target. * \param[out] handle Pointer to DART handle to instantiate for later use with \c dart_wait, \c dart_wait_all etc. * + * \note Base-type conversion is not performed. + * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * * \threadsafe @@ -543,7 +515,8 @@ dart_ret_t dart_put_handle( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handle) DART_NOTHROW; /** @@ -657,10 +630,13 @@ dart_ret_t dart_testall_local( * 'BLOCKING' variant of dart_get. * Both local and remote completion is guaranteed. * - * \param dest Local target memory to store the data. - * \param gptr Global pointer being the source of the data transfer. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param dest Local target memory to store the data. + * \param gptr Global pointer being the source of the data transfer. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values at the source. + * \param dst_type The data type of the values in buffer \c dest. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -671,16 +647,20 @@ dart_ret_t dart_get_blocking( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** * 'BLOCKING' variant of dart_put. * Both local and remote completion is guaranteed. * - * \param gptr Global pointer being the target of the data transfer. - * \param src Local source memory to transfer data from. - * \param nelem The number of elements of type \c dtype to transfer. - * \param dtype The data type of the values in buffer \c dest. + * \param gptr Global pointer being the target of the data transfer. + * \param src Local source memory to transfer data from. + * \param nelem The number of elements of type \c dtype to transfer. + * \param src_type The data type of the values in buffer \c src. + * \param dst_type The data type of the values at the target. + * + * \note Base-type conversion is not performed. * * \return \c DART_OK on success, any other of \ref dart_ret_t otherwise. * @@ -691,7 +671,8 @@ dart_ret_t dart_put_blocking( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype) DART_NOTHROW; + dart_datatype_t src_type, + dart_datatype_t dst_type) DART_NOTHROW; /** \} */ diff --git a/dart-if/include/dash/dart/if/dart_globmem.h b/dart-if/include/dash/dart/if/dart_globmem.h index 5969b042d..3604b7059 100644 --- a/dart-if/include/dash/dart/if/dart_globmem.h +++ b/dart-if/include/dash/dart/if/dart_globmem.h @@ -318,7 +318,7 @@ dart_ret_t dart_memfree(dart_gptr_t gptr) DART_NOTHROW; */ dart_ret_t dart_team_memalloc_aligned( dart_team_t teamid, - size_t nelem, + size_t nelem, dart_datatype_t dtype, dart_gptr_t * gptr) DART_NOTHROW; @@ -362,10 +362,10 @@ dart_ret_t dart_team_memfree( */ dart_ret_t dart_team_memregister_aligned( dart_team_t teamid, - size_t nelem, + size_t nelem, dart_datatype_t dtype, - void * addr, - dart_gptr_t * gptr) DART_NOTHROW; + void * addr, + dart_gptr_t * gptr) DART_NOTHROW; /** * Collective function, attaches external memory previously allocated by @@ -386,10 +386,10 @@ dart_ret_t dart_team_memregister_aligned( */ dart_ret_t dart_team_memregister( dart_team_t teamid, - size_t nlelem, + size_t nlelem, dart_datatype_t dtype, - void * addr, - dart_gptr_t * gptr) DART_NOTHROW; + void * addr, + dart_gptr_t * gptr) DART_NOTHROW; /** * Collective function similar to dart_team_memfree() but on previously diff --git a/dart-if/include/dash/dart/if/dart_types.h b/dart-if/include/dash/dart/if/dart_types.h index 3ddb729b3..c380d18ce 100644 --- a/dart-if/include/dash/dart/if/dart_types.h +++ b/dart-if/include/dash/dart/if/dart_types.h @@ -96,10 +96,11 @@ typedef enum * * \ingroup DartTypes */ +/** typedef enum { DART_TYPE_UNDEFINED = 0, - /// integral data types + /// integral data types DART_TYPE_BYTE, DART_TYPE_SHORT, DART_TYPE_INT, @@ -113,6 +114,35 @@ typedef enum /// Reserved, do not use! DART_TYPE_COUNT } dart_datatype_t; +**/ + +typedef intptr_t dart_datatype_t; +#if 0 +extern struct dart_datatype_struct __dart_type_undefined_t; +extern struct dart_datatype_struct __dart_type_byte_t; +extern struct dart_datatype_struct __dart_type_short_t; +extern struct dart_datatype_struct __dart_type_int_t; +extern struct dart_datatype_struct __dart_type_uint_t; +extern struct dart_datatype_struct __dart_type_long_t; +extern struct dart_datatype_struct __dart_type_ulong_t; +extern struct dart_datatype_struct __dart_type_longlong_t; +extern struct dart_datatype_struct __dart_type_float_t; +extern struct dart_datatype_struct __dart_type_double_t; +#endif + + +#define DART_TYPE_UNDEFINED (dart_datatype_t)(0) +#define DART_TYPE_BYTE (dart_datatype_t)(1) +#define DART_TYPE_SHORT (dart_datatype_t)(2) +#define DART_TYPE_INT (dart_datatype_t)(3) +#define DART_TYPE_UINT (dart_datatype_t)(4) +#define DART_TYPE_LONG (dart_datatype_t)(5) +#define DART_TYPE_ULONG (dart_datatype_t)(6) +#define DART_TYPE_LONGLONG (dart_datatype_t)(7) +#define DART_TYPE_FLOAT (dart_datatype_t)(8) +#define DART_TYPE_DOUBLE (dart_datatype_t)(9) +#define DART_TYPE_LAST (dart_datatype_t)(10) + /** size for integral \c size_t */ #if (UINT32_MAX == SIZE_MAX) @@ -236,11 +266,11 @@ typedef int16_t dart_team_t; /** * Levels of thread-support offered by DART. - * \ref DART_THREAD_MULTIPLE is supported if + * \ref DART_THREAD_MULTIPLE is supported if * DART has been build with \c DART_ENABLE_THREADSUPPORT - * and the underlying communication backend supports + * and the underlying communication backend supports * thread-safe access. - * + * */ typedef enum { @@ -660,6 +690,69 @@ typedef struct } dart_config_t; +/** + * Create a strided data type using blocks of size \c blocklen and a stride + * of \c stride. The number of elements copied using the resulting datatype + * has to be a multiple of \c blocklen. + * + * \param basetype The type of elements in the blocks. + * \param stride The stride between blocks. + * \param blocklen The number of elements of type \c basetype in each block. + * \param[out] newtype The newly created data type. + * + * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. + * + * \ingroup DartTypes + */ +dart_ret_t +dart_type_create_strided( + dart_datatype_t basetype, + size_t stride, + size_t blocklen, + dart_datatype_t * newtype); + + +/** + * Create an indexed data type using \c count blocks of size \c blocklen[i] + * with offsets \c offset[i] for each 0 <= i < count. The number of + * elements copied using the resulting datatype has to be a multiple of + * Sum(\c blocklen[0:i]). + * + * \param basetype The type of elements in the blocks. + * \param count The number of blocks. + * \param blocklen The number of elements of type \c basetype in block[i]. + * \param offset The offset of block[i]. + * \param[out] newtype The newly created data type. + * + * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. + * + * \ingroup DartTypes + */ +dart_ret_t +dart_type_create_indexed( + dart_datatype_t basetype, + size_t count, + const size_t blocklen[], + const size_t offset[], + dart_datatype_t * newtype); + +/** + * Destroy a data type that was previously created using + * \ref dart_type_create_strided or \ref dart_type_create_indexed. + * + * Data types can be destroyed before pending operations using that type have + * completed. However, after destruction a type may not be used to start + * new operations. + * + * \param dart_type The type to be destroyed. + * + * \return \ref DART_OK on success, any other of \ref dart_ret_t otherwise. + * + * \ingroup DartTypes + */ +dart_ret_t +dart_type_destroy(dart_datatype_t *dart_type); + /** \cond DART_HIDDEN_SYMBOLS */ #define DART_INTERFACE_OFF /** \endcond */ diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h index b487ef90e..b3e032b74 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_communication_priv.h @@ -10,23 +10,68 @@ #include #include +#include +#include #include #include #include +#include + + +/** + * The maximum number of elements of a certain type to be + * transfered in one chunk. + */ +#define MAX_CONTIG_ELEMENTS INT_MAX + +typedef enum { + DART_KIND_BASIC = 0, + DART_KIND_STRIDED, + DART_KIND_INDEXED +} dart_type_kind_t; + +typedef struct dart_datatype_struct { + /// the underlying data-type (type == base_type for basic types) + dart_datatype_t base_type; + /// the kind of this type (basic, strided, indexed) + dart_type_kind_t kind; + /// the overall number of elements in this type + size_t num_elem; + union { + /// used for basic types + struct { + /// the size in bytes of this type + size_t size; + /// the underlying MPI type + MPI_Datatype mpi_type; + /// the underlying MPI type used to handle large (>2GB) transfers + MPI_Datatype max_type; + } basic; + /// used for DART_KIND_STRIDED + /// NOTE: the underlying MPI strided type is created dynamically based on + /// the number of blocks required. + struct { + /// the stride between blocks of size \c num_elem + int stride; + } strided; + /// used for DART_KIND_INDEXED + struct { + /// the underlying MPI type + MPI_Datatype mpi_type; + /// the numbers of elements in each block + int * blocklens; + /// the offsets at which each block starts + int * offsets; + /// the number of blocks + int num_blocks; + } indexed; + }; +} dart_datatype_struct_t; DART_INTERNAL -extern int dart__mpi__datatype_sizes[DART_TYPE_COUNT]; +extern dart_datatype_struct_t __dart_base_types[DART_TYPE_LAST]; -/** DART handle type for non-blocking one-sided operations. */ -struct dart_handle_struct -{ - MPI_Request reqs[2]; // a large transfer might consist of two operations - MPI_Win win; - dart_unit_t dest; - uint8_t num_reqs; - bool needs_flush; -}; dart_ret_t dart__mpi__datatype_init() DART_INTERNAL; @@ -34,7 +79,7 @@ dart__mpi__datatype_init() DART_INTERNAL; dart_ret_t dart__mpi__datatype_fini() DART_INTERNAL; -static inline MPI_Op dart__mpi__op(dart_operation_t dart_op) { +DART_INLINE MPI_Op dart__mpi__op(dart_operation_t dart_op) { switch (dart_op) { case DART_OP_MIN : return MPI_MIN; case DART_OP_MAX : return MPI_MAX; @@ -52,46 +97,118 @@ static inline MPI_Op dart__mpi__op(dart_operation_t dart_op) { } } -static inline MPI_Datatype dart__mpi__datatype(dart_datatype_t dart_datatype) { - switch (dart_datatype) { - case DART_TYPE_BYTE : return MPI_BYTE; - case DART_TYPE_SHORT : return MPI_SHORT; - case DART_TYPE_INT : return MPI_INT; - case DART_TYPE_UINT : return MPI_UNSIGNED; - case DART_TYPE_LONG : return MPI_LONG; - case DART_TYPE_ULONG : return MPI_UNSIGNED_LONG; - case DART_TYPE_LONGLONG : return MPI_LONG_LONG_INT; - case DART_TYPE_FLOAT : return MPI_FLOAT; - case DART_TYPE_DOUBLE : return MPI_DOUBLE; - default : return (MPI_Datatype)(-1); - } +DART_INLINE +dart_datatype_struct_t * dart__mpi__datatype_struct( + dart_datatype_t dart_datatype) +{ + return (dart_datatype < DART_TYPE_LAST) + ? &__dart_base_types[dart_datatype] + : (dart_datatype_struct_t *)dart_datatype; } -static inline int dart__mpi__datatype_sizeof(dart_datatype_t dart_datatype) { +DART_INLINE +int dart__mpi__datatype_sizeof(dart_datatype_t dart_type) { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + return (dts->kind == DART_KIND_BASIC) ? dts->basic.size : -1; +} - if (dart_datatype > DART_TYPE_UNDEFINED && dart_datatype < DART_TYPE_COUNT) - { - return dart__mpi__datatype_sizes[dart_datatype]; - } - return -1; +DART_INLINE +dart_datatype_t dart__mpi__datatype_base(dart_datatype_t dart_type) { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + return (dts->kind == DART_KIND_BASIC) ? dart_type : dts->base_type; +} + +DART_INLINE +bool dart__mpi__datatype_isbasic(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_BASIC); } +DART_INLINE +bool dart__mpi__datatype_isstrided(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_STRIDED); +} + +DART_INLINE +bool dart__mpi__datatype_isindexed(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->kind == DART_KIND_INDEXED); +} -#if 0 -static inline int dart_mpi_datatype_disp_unit(dart_datatype_t dart_datatype) { - switch (dart_datatype) { - case DART_TYPE_BYTE : return 1; - case DART_TYPE_SHORT : return 1; - case DART_TYPE_INT : return 4; - case DART_TYPE_UINT : return 4; - case DART_TYPE_LONG : return 4; - case DART_TYPE_ULONG : return 4; - case DART_TYPE_LONGLONG : return 8; - case DART_TYPE_FLOAT : return 4; - case DART_TYPE_DOUBLE : return 8; - default : return 1; +DART_INLINE +bool dart__mpi__datatype_samebase( + dart_datatype_t lhs_type, + dart_datatype_t rhs_type) { + return ( + dart__mpi__datatype_base(lhs_type) == dart__mpi__datatype_base(rhs_type)); +} + +DART_INLINE +MPI_Datatype dart__mpi__datatype_maxtype(dart_datatype_t dart_type) { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + return (dts->kind == DART_KIND_BASIC) ? dts->basic.max_type + : dart__mpi__datatype_maxtype( + dts->base_type); +} + +DART_INLINE +size_t dart__mpi__datatype_num_elem(dart_datatype_t dart_type) { + return (dart__mpi__datatype_struct(dart_type)->num_elem); +} + +MPI_Datatype +dart__mpi__create_strided_mpi( + dart_datatype_t dart_type, + size_t num_blocks) DART_INTERNAL; + +void +dart__mpi__destroy_strided_mpi(MPI_Datatype *mpi_type) DART_INTERNAL; + +DART_INLINE +void +dart__mpi__datatype_convert_mpi( + dart_datatype_t dart_type, + size_t dart_num_elem, + MPI_Datatype * mpi_type, + int * mpi_num_elem) +{ + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + switch(dts->kind) { + case DART_KIND_BASIC: + *mpi_num_elem = dart_num_elem; + *mpi_type = dts->basic.mpi_type; + break; + case DART_KIND_STRIDED: + *mpi_num_elem = 1; + *mpi_type = dart__mpi__create_strided_mpi( + dart_type, dart_num_elem / dts->num_elem); + break; + case DART_KIND_INDEXED: + *mpi_num_elem = dart_num_elem / dts->num_elem; + *mpi_type = dts->indexed.mpi_type; + break; + default: + // should not happen! + DART_ASSERT_MSG(NULL, "Unknown DART type detected!"); } } -#endif + +char* dart__mpi__datatype_name(dart_datatype_t dart_type) DART_INTERNAL; + +/** + * Helper macro that checks whether the given type is a basic type + * and errors out in case of an error. + */ + +#define CHECK_IS_BASICTYPE(_dtype) \ + do { \ + if (dart__unlikely(!dart__mpi__datatype_isbasic(_dtype))) { \ + char *name = dart__mpi__datatype_name(_dtype); \ + DART_LOG_ERROR( \ + "%s ! Only basic types allowed in this operation (%s given)",\ + __FUNCTION__, name); \ + free(name); \ + return DART_ERR_INVAL; \ + } \ + } while (0) + #endif /* DART_ADAPT_COMMUNICATION_PRIV_H_INCLUDED */ diff --git a/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h b/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h index 52c07546d..8d977c63d 100644 --- a/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h +++ b/dart-impl/mpi/include/dash/dart/mpi/dart_segment.h @@ -96,7 +96,9 @@ dart_segment_info_t * dart_segment_get_info( */ static inline MPI_Aint -dart_segment_disp(dart_segment_info_t *seginfo, dart_team_unit_t team_unit_id) +dart_segment_disp( + const dart_segment_info_t *seginfo, + dart_team_unit_t team_unit_id) { return (seginfo->disp != NULL) ? seginfo->disp[team_unit_id.id] : 0; } diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index eb2fc2747..349d85035 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -30,11 +30,6 @@ #include #include -/** - * The maximum number of elements of a certain type to be - * transfered in one chunk. - */ -#define MAX_CONTIG_ELEMENTS INT_MAX #define CHECK_UNITID_RANGE(_unitid, _team_data) \ do { \ @@ -45,6 +40,38 @@ } \ } while (0) +#define CHECK_EQUAL_BASETYPE(_src_type, _dst_type) \ + do { \ + if (dart__unlikely(!dart__mpi__datatype_samebase(_src_type, _dst_type))){ \ + char *src_name = dart__mpi__datatype_name(_src_type); \ + char *dst_name = dart__mpi__datatype_name(dst_type); \ + DART_LOG_ERROR("%s ! Cannot convert base-types (%s vs %s)", \ + __FUNCTION__, src_name, dst_name); \ + free(src_name); \ + free(dst_name); \ + return DART_ERR_INVAL; \ + } \ + } while (0) + +#define CHECK_NUM_ELEM(_src_type, _dst_type, _num_elem) \ + do { \ + size_t src_num_elem = dart__mpi__datatype_num_elem(_src_type); \ + size_t dst_num_elem = dart__mpi__datatype_num_elem(_dst_type); \ + if ((_num_elem % src_num_elem) != 0 || (_num_elem % dst_num_elem) != 0) { \ + char *src_name = dart__mpi__datatype_name(_src_type); \ + char *dst_name = dart__mpi__datatype_name(dst_type); \ + DART_LOG_ERROR( \ + "%s ! Type-mismatch would lead to truncation (%s vs %s with %zu elems)",\ + __FUNCTION__, src_name, dst_name, _num_elem); \ + free(src_name); \ + free(dst_name); \ + return DART_ERR_INVAL; \ + } \ + } while (0) + +#define CHECK_TYPE_CONSTRAINTS(_src_type, _dst_type, _num_elem) \ + CHECK_EQUAL_BASETYPE(_src_type, _dst_type); \ + CHECK_NUM_ELEM(_src_type, _dst_type, _num_elem); /** * Temporary space allocation: @@ -62,62 +89,29 @@ free(__ptr); \ } while (0) +/** DART handle type for non-blocking one-sided operations. */ +struct dart_handle_struct +{ + MPI_Request reqs[2]; // a large transfer might consist of two operations + MPI_Win win; + dart_unit_t dest; + uint8_t num_reqs; + bool needs_flush; +}; + +/** + * Help to check for return of MPI call. + * Since DART currently does not define an MPI error handler the abort will not + * be reached. + */ #define CHECK_MPI_RET(__call, __name) \ do { \ - if (dart__unlikely(__call != MPI_SUCCESS)) { \ + if (dart__unlikely(__call != MPI_SUCCESS)) { \ DART_LOG_ERROR("%s ! %s failed!", __func__, __name); \ - return DART_ERR_OTHER; \ + dart_abort(DART_EXIT_ABORT); \ } \ } while (0) -int dart__mpi__datatype_sizes[DART_TYPE_COUNT]; -static MPI_Datatype dart__mpi__max_chunk_datatype[DART_TYPE_COUNT]; - -dart_ret_t -dart__mpi__datatype_init() -{ - for (int i = DART_TYPE_UNDEFINED+1; i < DART_TYPE_COUNT; i++) { - - // query the size of the data type - int ret = MPI_Type_size( - dart__mpi__datatype(i), - &dart__mpi__datatype_sizes[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to query size of DART data type %i", i); - return DART_ERR_INVAL; - } - - // create the chunk data type - ret = MPI_Type_contiguous(MAX_CONTIG_ELEMENTS, - dart__mpi__datatype(i), - &dart__mpi__max_chunk_datatype[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to create chunk type of DART data type %i", i); - return DART_ERR_INVAL; - } - ret = MPI_Type_commit(&dart__mpi__max_chunk_datatype[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to commit chunk type of DART data type %i", i); - return DART_ERR_INVAL; - } - - } - return DART_OK; -} - -dart_ret_t -dart__mpi__datatype_fini() -{ - for (int i = DART_TYPE_UNDEFINED+1; i < DART_TYPE_COUNT; i++) { - int ret = MPI_Type_free(&dart__mpi__max_chunk_datatype[i]); - if (ret != MPI_SUCCESS) { - DART_LOG_ERROR("Failed to commit chunk type of DART data type %i", i); - return DART_ERR_INVAL; - } - } - return DART_OK; -} - #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) static dart_ret_t get_shared_mem( @@ -163,51 +157,77 @@ static dart_ret_t put_shared_mem( } #endif // !defined(DART_MPI_DISABLE_SHARED_WINDOWS) +/** + * Internal implementations of put/get with and without handles for + * basic data types and complex data types. + */ -dart_ret_t dart_get( - void * dest, - dart_gptr_t gptr, - size_t nelem, - dart_datatype_t dtype) +static __attribute__((always_inline)) inline +int +dart__mpi__get( + void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win, + MPI_Request *reqs, uint8_t * num_reqs) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - dart_team_t teamid = gptr.teamid; - - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (dart__unlikely(team_data == NULL)) { - DART_LOG_ERROR("dart_get ! failed: Unknown team %i!", teamid); - return DART_ERR_INVAL; + if (reqs != NULL) { + return MPI_Rget(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, target_datatype, + win, &reqs[(*num_reqs)++]); + } else { + return MPI_Get(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, + target_datatype, win); } +} - CHECK_UNITID_RANGE(team_unit_id, team_data); - - DART_LOG_DEBUG("dart_get() uid:%d o:%"PRIu64" s:%d t:%d nelem:%zu", - team_unit_id.id, offset, seg_id, teamid, nelem); - - dart_segment_info_t *seginfo = dart_segment_get_info( - &(team_data->segdata), seg_id); - if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_get ! " - "Unknown segment %i on team %i", seg_id, teamid); - return DART_ERR_INVAL; +static __attribute__((always_inline)) inline +int +dart__mpi__put( + const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Win win, + MPI_Request *reqs, uint8_t * num_reqs) +{ + if (reqs != NULL) { + return MPI_Rput(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, target_datatype, + win, &reqs[(*num_reqs)++]); + } else { + return MPI_Put(origin_addr, origin_count, origin_datatype, + target_rank, target_disp, target_count, + target_datatype, win); } +} + +static inline +dart_ret_t +dart__mpi__get_basic( + const dart_team_data_t * team_data, + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + void * dest, + uint64_t offset, + size_t nelem, + dart_datatype_t dtype, + MPI_Request * reqs, + uint8_t * num_reqs) +{ + if (num_reqs) *num_reqs = 0; if (team_data->unitid == team_unit_id.id) { // use direct memcpy if we are on the same unit memcpy(dest, seginfo->selfbaseptr + offset, nelem * dart__mpi__datatype_sizeof(dtype)); DART_LOG_DEBUG("dart_get: memcpy nelem:%zu " - "source (coll.): offset:%lu -> dest: %p", - nelem, offset, dest); + "source (coll.): offset:%lu -> dest: %p", + nelem, offset, dest); return DART_OK; } #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_get: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + if (seginfo->segid >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { return get_shared_mem(team_data, seginfo, dest, offset, team_unit_id, nelem, dtype); } @@ -216,93 +236,134 @@ dart_ret_t dart_get( #endif // !defined(DART_MPI_DISABLE_SHARED_WINDOWS) /* - * MPI uses offset type int, do not copy more than INT_MAX elements: - */ - // chunk up the get + * MPI uses offset type int, chunk up the get if necessary + */ const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - char * dest_ptr = (char*) dest; // source on another node or shared memory windows disabled - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); + MPI_Win win = seginfo->win; + offset += dart_segment_disp(seginfo, team_unit_id); + char * dest_ptr = (char*) dest; if (nchunks > 0) { DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); + dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); CHECK_MPI_RET( - MPI_Get(dest_ptr, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - team_unit_id.id, - offset, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - win), + dart__mpi__get(dest_ptr, nchunks, + dart__mpi__datatype_maxtype(dtype), + team_unit_id.id, + offset, + nchunks, + dart__mpi__datatype_maxtype(dtype), + win, reqs, num_reqs), "MPI_Get"); offset += nchunks * MAX_CONTIG_ELEMENTS; dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; } if (remainder > 0) { - DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", dest_ptr, remainder); - if (MPI_Get(dest_ptr, - remainder, - mpi_dtype, - team_unit_id.id, - offset, - remainder, - mpi_dtype, - win) != MPI_SUCCESS) { - DART_LOG_ERROR("dart_get ! MPI_Get failed"); - return DART_ERR_INVAL; - } + DART_LOG_TRACE("dart_get: MPI_Get (dest %p, size %zu)", + dest_ptr, remainder); + CHECK_MPI_RET( + dart__mpi__get(dest_ptr, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + team_unit_id.id, + offset, + remainder, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win, reqs, num_reqs), + "MPI_Get"); } - - DART_LOG_DEBUG("dart_get > finished"); return DART_OK; } -dart_ret_t dart_put( - dart_gptr_t gptr, - const void * src, - size_t nelem, - dart_datatype_t dtype) +static inline +dart_ret_t +dart__mpi__get_complex( + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + void * dest, + uint64_t offset, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type, + MPI_Request * reqs, + uint8_t * num_reqs) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - dart_team_t teamid = gptr.teamid; + if (num_reqs != NULL) *num_reqs = 0; - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (dart__unlikely(team_data == NULL)) { - DART_LOG_ERROR("dart_put ! failed: Unknown team %i!", teamid); - return DART_ERR_INVAL; - } + CHECK_TYPE_CONSTRAINTS(src_type, dst_type, nelem); - CHECK_UNITID_RANGE(team_unit_id, team_data); + MPI_Win win = seginfo->win; + char * dest_ptr = (char*) dest; + offset += dart_segment_disp(seginfo, team_unit_id); - dart_segment_info_t *seginfo = dart_segment_get_info( - &(team_data->segdata), seg_id); - if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_put ! " - "Unknown segment %i on team %i", seg_id, teamid); - return DART_ERR_INVAL; + MPI_Datatype src_mpi_type, dst_mpi_type; + int src_num_elem, dst_num_elem; + dart__mpi__datatype_convert_mpi( + src_type, nelem, &src_mpi_type, &src_num_elem); + if (src_type != dst_type) { + dart__mpi__datatype_convert_mpi( + dst_type, nelem, &dst_mpi_type, &dst_num_elem); + } else { + dst_mpi_type = src_mpi_type; + dst_num_elem = src_num_elem; + } + + DART_LOG_TRACE("dart_get: MPI_Rget (dest %p, size %zu)", dest_ptr, nelem); + CHECK_MPI_RET( + dart__mpi__get(dest_ptr, + dst_num_elem, + dst_mpi_type, + team_unit_id.id, + offset, + src_num_elem, + src_mpi_type, + win, + reqs, num_reqs), + "MPI_Rget"); + // clean-up strided data types + if (dart__mpi__datatype_isstrided(src_type)) { + dart__mpi__destroy_strided_mpi(&src_mpi_type); + } + if (src_type != dst_type && dart__mpi__datatype_isstrided(dst_type)) { + dart__mpi__destroy_strided_mpi(&dst_mpi_type); } + return DART_OK; +} + +static inline +dart_ret_t +dart__mpi__put_basic( + const dart_team_data_t * team_data, + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + const void * src, + uint64_t offset, + size_t nelem, + dart_datatype_t dtype, + MPI_Request * reqs, + uint8_t * num_reqs, + bool * flush_required_ptr) +{ + if (num_reqs) *num_reqs = 0; /* copy data directly if we are on the same unit */ if (team_unit_id.id == team_data->unitid) { + if (flush_required_ptr) *flush_required_ptr = false; memcpy(seginfo->selfbaseptr + offset, src, nelem * dart__mpi__datatype_sizeof(dtype)); DART_LOG_DEBUG("dart_put: memcpy nelem:%zu (from global allocation)" - "offset: %"PRIu64"", nelem, offset); + "offset: %"PRIu64"", nelem, offset); return DART_OK; } #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_put: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + if (seginfo->segid >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { + if (flush_required_ptr) *flush_required_ptr = false; return put_shared_mem(team_data, seginfo, src, offset, team_unit_id, nelem, dtype); } @@ -310,27 +371,30 @@ dart_ret_t dart_put( DART_LOG_DEBUG("dart_put: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ + if (flush_required_ptr) *flush_required_ptr = true; + // source on another node or shared memory windows disabled - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); + MPI_Win win = seginfo->win; + offset += dart_segment_disp(seginfo, team_unit_id); + const char * src_ptr = (const char*) src; // chunk up the put const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - const char * src_ptr = (const char*) src; if (nchunks > 0) { - DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); + DART_LOG_TRACE("dart_put: MPI_Rput (src %p, size %zu)", + src_ptr, nchunks * MAX_CONTIG_ELEMENTS); CHECK_MPI_RET( - MPI_Put(src_ptr, + dart__mpi__put(src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_struct(dtype)->basic.max_type, team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], - win), + dart__mpi__datatype_struct(dtype)->basic.max_type, + win, + reqs, num_reqs), "MPI_Put"); offset += nchunks * MAX_CONTIG_ELEMENTS; src_ptr += nchunks * MAX_CONTIG_ELEMENTS; @@ -340,20 +404,184 @@ dart_ret_t dart_put( DART_LOG_TRACE("dart_put: MPI_Put (src %p, size %zu)", src_ptr, remainder); CHECK_MPI_RET( - MPI_Put(src_ptr, + dart__mpi__put(src_ptr, remainder, - mpi_dtype, + dart__mpi__datatype_struct(dtype)->basic.mpi_type, team_unit_id.id, offset, remainder, - mpi_dtype, - win), + dart__mpi__datatype_struct(dtype)->basic.mpi_type, + win, + reqs, num_reqs), "MPI_Put"); } + return DART_OK; +} + +static inline +dart_ret_t +dart__mpi__put_complex( + dart_team_unit_t team_unit_id, + const dart_segment_info_t * seginfo, + const void * src, + uint64_t offset, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type, + MPI_Request * reqs, + uint8_t * num_reqs, + bool * flush_required_ptr) +{ + if (flush_required_ptr) *flush_required_ptr = true; + if (num_reqs) *num_reqs = 0; + + // slow path for derived types + CHECK_TYPE_CONSTRAINTS(src_type, dst_type, nelem); + + MPI_Win win = seginfo->win; + const char * src_ptr = (const char*) src; + offset += dart_segment_disp(seginfo, team_unit_id); + + MPI_Datatype src_mpi_type, dst_mpi_type; + int src_num_elem, dst_num_elem; + dart__mpi__datatype_convert_mpi( + src_type, nelem, &src_mpi_type, &src_num_elem); + if (src_type != dst_type) { + dart__mpi__datatype_convert_mpi( + dst_type, nelem, &dst_mpi_type, &dst_num_elem); + } else { + dst_mpi_type = src_mpi_type; + dst_num_elem = src_num_elem; + } + + DART_LOG_TRACE( + "dart_put: MPI_Put (src %p, size %zu, src_type %p, dst_type %p)", + src_ptr, nelem, src_mpi_type, dst_mpi_type); + CHECK_MPI_RET( + dart__mpi__put(src_ptr, + src_num_elem, + src_mpi_type, + team_unit_id.id, + offset, + dst_num_elem, + dst_mpi_type, + win, + reqs, num_reqs), + "MPI_Put"); + + // clean-up strided data types + if (dart__mpi__datatype_isstrided(src_type)) { + dart__mpi__destroy_strided_mpi(&src_mpi_type); + } + if (src_type != dst_type && dart__mpi__datatype_isstrided(dst_type)) { + dart__mpi__destroy_strided_mpi(&dst_mpi_type); + } return DART_OK; } +/** + * Public interface for put/get. + */ + +dart_ret_t dart_get( + void * dest, + dart_gptr_t gptr, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type) +{ + uint64_t offset = gptr.addr_or_offs.offset; + int16_t seg_id = gptr.segid; + dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); + dart_team_t teamid = gptr.teamid; + + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); + if (dart__unlikely(team_data == NULL)) { + DART_LOG_ERROR("dart_get ! failed: Unknown team %i!", teamid); + return DART_ERR_INVAL; + } + CHECK_UNITID_RANGE(team_unit_id, team_data); + + DART_LOG_DEBUG("dart_get() uid:%d o:%"PRIu64" s:%d t:%d nelem:%zu", + team_unit_id.id, offset, seg_id, teamid, nelem); + + dart_segment_info_t *seginfo = dart_segment_get_info( + &(team_data->segdata), seg_id); + if (dart__unlikely(seginfo == NULL)) { + DART_LOG_ERROR("dart_get ! " + "Unknown segment %i on team %i", seg_id, teamid); + return DART_ERR_INVAL; + } + + dart_ret_t ret = DART_OK; + + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast-path for basic types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__get_basic(team_data, team_unit_id, seginfo, dest, + offset, nelem, src_type, NULL, NULL); + } else { + // slow path for derived types + ret = dart__mpi__get_complex(team_unit_id, seginfo, dest, + offset, nelem, src_type, dst_type, NULL, NULL); + } + + DART_LOG_DEBUG("dart_get > finished"); + return ret; +} + +dart_ret_t dart_put( + dart_gptr_t gptr, + const void * src, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type) +{ + uint64_t offset = gptr.addr_or_offs.offset; + int16_t seg_id = gptr.segid; + dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); + dart_team_t teamid = gptr.teamid; + + CHECK_EQUAL_BASETYPE(src_type, dst_type); + + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); + if (dart__unlikely(team_data == NULL)) { + DART_LOG_ERROR("dart_put ! failed: Unknown team %i!", teamid); + return DART_ERR_INVAL; + } + + CHECK_UNITID_RANGE(team_unit_id, team_data); + + dart_segment_info_t *seginfo = dart_segment_get_info( + &(team_data->segdata), seg_id); + if (dart__unlikely(seginfo == NULL)) { + DART_LOG_ERROR("dart_put ! " + "Unknown segment %i on team %i", seg_id, teamid); + return DART_ERR_INVAL; + } + + dart_ret_t ret = DART_OK; + + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast path for basic data types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__put_basic(team_data, team_unit_id, seginfo, src, + offset, nelem, src_type, + NULL, NULL, NULL); + } else { + // slow path for complex data types + ret = dart__mpi__put_complex(team_unit_id, seginfo, src, + offset, nelem, src_type, dst_type, + NULL, NULL, NULL); + } + + return ret; +} + dart_ret_t dart_accumulate( dart_gptr_t gptr, const void * values, @@ -367,9 +595,12 @@ dart_ret_t dart_accumulate( uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; - mpi_dtype = dart__mpi__datatype(dtype); + + CHECK_IS_BASICTYPE(dtype); + mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; mpi_op = dart__mpi__op(op); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_accumulate ! failed: Unknown team %i!", teamid); @@ -404,11 +635,11 @@ dart_ret_t dart_accumulate( MPI_Accumulate( src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_maxtype(dtype), team_unit_id.id, offset, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_maxtype(dtype), mpi_op, win), "MPI_Accumulate"); @@ -451,7 +682,9 @@ dart_ret_t dart_fetch_and_op( uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; - mpi_dtype = dart__mpi__datatype(dtype); + + CHECK_IS_BASICTYPE(dtype); + mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; mpi_op = dart__mpi__op(op); dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); @@ -496,393 +729,70 @@ dart_ret_t dart_fetch_and_op( dart_ret_t dart_compare_and_swap( dart_gptr_t gptr, - const void * value, - const void * compare, - void * result, - dart_datatype_t dtype) -{ - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; - dart_team_t teamid = gptr.teamid; - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - - if (dtype > DART_TYPE_LONGLONG) { - DART_LOG_ERROR("dart_compare_and_swap ! failed: " - "only valid on integral types"); - return DART_ERR_INVAL; - } - - dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); - if (team_data == NULL) { - DART_LOG_ERROR("dart_compare_and_swap ! failed: Unknown team %i!", - gptr.teamid); - return DART_ERR_INVAL; - } - - CHECK_UNITID_RANGE(team_unit_id, team_data); - - DART_LOG_TRACE("dart_compare_and_swap() dtype:%d unit:%d offset:%"PRIu64, - dtype, team_unit_id.id, gptr.addr_or_offs.offset); - - dart_segment_info_t *seginfo = dart_segment_get_info( - &(team_data->segdata), seg_id); - if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_compare_and_swap ! " - "Unknown segment %i on team %i", seg_id, teamid); - return DART_ERR_INVAL; - } - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - CHECK_MPI_RET( - MPI_Compare_and_swap( - value, - compare, - result, - mpi_dtype, - team_unit_id.id, - offset, - win), - "MPI_Compare_and_swap"); - DART_LOG_DEBUG("dart_compare_and_swap > finished"); - return DART_OK; -} - -dart_ret_t dart_get_strided_handle( - void* dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - size_t stride, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t* handleptr) -{ - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - dart_team_t teamid = gptr.teamid; - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; - - char* dest_ptr = (char*) dest; - *handleptr = DART_HANDLE_NULL; - - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (dart__unlikely(team_data == NULL)) { - DART_LOG_ERROR("dart_get_strided_handle ! failed: Unknown team %i!", teamid); - return DART_ERR_INVAL; - } - - CHECK_UNITID_RANGE(team_unit_id, team_data); - - dart_segment_info_t *seginfo = dart_segment_get_info( - &(team_data->segdata), seg_id); - if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_get_strided_handle ! " - "Unknown segment %i on team %i", seg_id, teamid); - return DART_ERR_INVAL; - } - - DART_LOG_DEBUG("dart_get_strided_handle: uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", - team_unit_id.id, offset, seg_id, gptr.teamid, nblocks * nelems_block); - DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(*handleptr)); - - /* - * MPI uses offset type int, do not copy more than INT_MAX elements: - */ - if (nelems_block * nblocks > INT_MAX) { - DART_LOG_ERROR("dart_get_strided_handle ! failed: nelem * blocks > INT_MAX"); - return DART_ERR_INVAL; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_strided_handle: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_team_unit_t luid = team_data->sharedmem_tab[gptr.unitid]; - /* - * Use memcpy if the target is in the same node as the calling unit: - */ - DART_LOG_DEBUG("dart_get_strided_handle: shared memory segment, seg_id:%d", seg_id); - char* baseptr = seginfo->baseptr[luid.id] + offset; - - DART_LOG_DEBUG( "dart_get_strided_handle: memcpy %zu bytes", - nblocks * nelems_block * dart__mpi__datatype_sizeof(dtype)); - - size_t size_dtype = dart__mpi__datatype_sizeof(dtype); - size_t size_nelems = nelems_block * size_dtype; - size_t offset_dest = stride * size_dtype; - size_t offset_src = offset_dest; - switch(stride_opt) { - case STRIDED_TO_STRIDED: break; - case STRIDED_TO_CONTIG: offset_dest = size_nelems; break; - case CONTIG_TO_STRIDED: offset_src = size_nelems; break; - defualt: DART_LOG_ERROR("dart_get_indexed_handle ! unknown stride option"); - return DART_ERR_INVAL; - } - - for(size_t i = 0; i < nblocks; ++i, dest_ptr += offset_dest, baseptr += offset_src) - memcpy(dest_ptr, baseptr, size_nelems); - - return DART_OK; - } -#else - DART_LOG_DEBUG("dart_get_strided_handle: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_Rget: - */ - MPI_Datatype mpi_elem_type = dart__mpi__datatype(dtype); - MPI_Datatype mpi_strided_type; - MPI_Type_vector(nblocks, nelems_block, stride, mpi_elem_type, &mpi_strided_type); - MPI_Type_commit(&mpi_strided_type); - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); - handle->dest = team_unit_id.id; - handle->win = win; - handle->needs_flush = false; - - DART_LOG_DEBUG("dart_get_strided_handle: -- MPI_Rget(dest %p, size %zu)", - dest_ptr, nblocks * nelems_block); - - int mpi_ret = -1; - switch(stride_opt) { - case STRIDED_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_strided_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_strided_type, // target data type - win, // window - &handle->reqs[0]); - break; - case STRIDED_TO_CONTIG: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - nblocks * nelems_block, // origin count - mpi_elem_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_strided_type, // target data type - win, // window - &handle->reqs[0]); - break; - case CONTIG_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_strided_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - nblocks * nelems_block, // target count - mpi_elem_type, // target data type - win, // window - &handle->reqs[0]); - break; - default: DART_LOG_ERROR("dart_get_strided_handle ! unknown stride option"); - } - - if (mpi_ret != MPI_SUCCESS) { - free(handle); - MPI_Type_free(&mpi_strided_type); - DART_LOG_ERROR("dart_get_strided_handle ! MPI_Rget failed"); - return DART_ERR_OTHER; - } - - handle->num_reqs++; - *handleptr = handle; - - DART_LOG_TRACE("dart_get_strided_handle > handle(%p) dest:%d win:%"PRIu64, - (void*) handle, handle->dest, (unsigned long) win); - - MPI_Type_free(&mpi_strided_type); - - return DART_OK; -} - -dart_ret_t dart_get_indexed_handle( - void* dest, - dart_gptr_t gptr, - size_t nblocks, - size_t nelems_block, - int* indexes, - dart_datatype_t dtype, - dart_stride_option stride_opt, - dart_handle_t* handleptr) + const void * value, + const void * compare, + void * result, + dart_datatype_t dtype) { - dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); - dart_team_t teamid = gptr.teamid; - uint64_t offset = gptr.addr_or_offs.offset; - int16_t seg_id = gptr.segid; + dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); + uint64_t offset = gptr.addr_or_offs.offset; + int16_t seg_id = gptr.segid; + dart_team_t teamid = gptr.teamid; - char* dest_ptr = (char*) dest; - *handleptr = DART_HANDLE_NULL; + if (dtype > DART_TYPE_LONGLONG) { + DART_LOG_ERROR("dart_compare_and_swap ! failed: " + "only valid on integral types"); + return DART_ERR_INVAL; + } + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; - dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (dart__unlikely(team_data == NULL)) { - DART_LOG_ERROR("dart_get_indexed_handle ! failed: Unknown team %i!", teamid); + dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); + if (team_data == NULL) { + DART_LOG_ERROR("dart_compare_and_swap ! failed: Unknown team %i!", + gptr.teamid); return DART_ERR_INVAL; } CHECK_UNITID_RANGE(team_unit_id, team_data); + DART_LOG_TRACE("dart_compare_and_swap() dtype:%d unit:%d offset:%"PRIu64, + dtype, team_unit_id.id, gptr.addr_or_offs.offset); + dart_segment_info_t *seginfo = dart_segment_get_info( &(team_data->segdata), seg_id); if (dart__unlikely(seginfo == NULL)) { - DART_LOG_ERROR("dart_get_indexed_handle ! " + DART_LOG_ERROR("dart_compare_and_swap ! " "Unknown segment %i on team %i", seg_id, teamid); return DART_ERR_INVAL; } - DART_LOG_DEBUG("dart_get_indexed_handle: uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", - team_unit_id.id, offset, seg_id, gptr.teamid, nblocks * nelems_block); - DART_LOG_TRACE("dart_get_indexed_handle: allocated handle:%p", (void *)(*handleptr)); - - /* - * MPI uses offset type int, do not copy more than INT_MAX elements: - */ - if (nelems_block * nblocks > INT_MAX) { - DART_LOG_ERROR("dart_get_indexed_handle ! failed: nelem * blocks > INT_MAX"); - return DART_ERR_INVAL; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_indexed_handle: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_team_unit_t luid = team_data->sharedmem_tab[gptr.unitid]; - /* - * Use memcpy if the target is in the same node as the calling unit: - */ - DART_LOG_DEBUG("dart_get_indexed_handle: shared memory segment, seg_id:%d", seg_id); - char* baseptr = seginfo->baseptr[luid.id] + offset; - - DART_LOG_DEBUG( "dart_get_indexed_handle: memcpy %zu bytes", - nblocks * nelems_block * dart__mpi__datatype_sizeof(dtype)); - - size_t size_dtype = dart__mpi__datatype_sizeof(dtype); - size_t size_nelems = nelems_block * size_dtype; - - if(stride_opt == STRIDED_TO_STRIDED) { - for(size_t i = 0; i < nblocks; ++i) { - size_t size_offset = indexes[i] * size_dtype; - memcpy(dest_ptr + size_offset, baseptr + size_offset, size_nelems); - } - } - else if(stride_opt == STRIDED_TO_CONTIG) { - for(size_t i = 0; i < nblocks; ++i) - memcpy((dest_ptr) + i * size_nelems, baseptr + indexes[i] * size_dtype, size_nelems); - } - else if(stride_opt == CONTIG_TO_STRIDED) { - for(size_t i = 0; i < nblocks; ++i, baseptr += size_nelems) - memcpy(dest_ptr + indexes[i] * size_dtype, baseptr, size_nelems); - } - else { - DART_LOG_ERROR("dart_get_indexed_handle ! unknown stride option"); - return DART_ERR_INVAL; - } - - return DART_OK; - } -#else - DART_LOG_DEBUG("dart_get_indexed_handle: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_Rget: - */ - MPI_Datatype mpi_elem_type = dart__mpi__datatype(dtype); - MPI_Datatype mpi_indexed_type; - MPI_Type_create_indexed_block(nblocks, nelems_block, indexes, mpi_elem_type, &mpi_indexed_type); - MPI_Type_commit(&mpi_indexed_type); - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - - dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); - handle->dest = team_unit_id.id; - handle->win = win; - handle->needs_flush = false; - - DART_LOG_DEBUG("dart_get_indexed_handle: -- MPI_Rget(dest %p, size %zu)", - dest_ptr, nblocks * nelems_block); - - int mpi_ret = 0; - switch(stride_opt) { - case STRIDED_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_indexed_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_indexed_type, // target data type - win, // window - &handle->reqs[0]); - break; - case STRIDED_TO_CONTIG: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - nblocks * nelems_block, // origin count - mpi_elem_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - 1, // target count - mpi_indexed_type, // target data type - win, // window - &handle->reqs[0]); - break; - case CONTIG_TO_STRIDED: - mpi_ret = MPI_Rget( - dest_ptr, // origin address - 1, // origin count - mpi_indexed_type, // origin data type - team_unit_id.id, // target rank - offset, // target disp in window - nblocks * nelems_block, // target count - mpi_elem_type, // target data type - win, // window - &handle->reqs[0]); - break; - default: DART_LOG_ERROR("dart_get_indexed_handle ! unknown stride option"); - } - - if (mpi_ret != MPI_SUCCESS) { - free(handle); - MPI_Type_free(&mpi_indexed_type); - DART_LOG_ERROR("dart_get_indexed_handle ! MPI_Rget failed"); - return DART_ERR_OTHER; - } - - handle->num_reqs++; - *handleptr = handle; - - DART_LOG_TRACE("dart_get_indexed_handle > handle(%p) dest:%d win:%"PRIu64, - (void*) handle, handle->dest, (unsigned long) win); - - MPI_Type_free(&mpi_indexed_type); + MPI_Win win = seginfo->win; + offset += dart_segment_disp(seginfo, team_unit_id); + CHECK_MPI_RET( + MPI_Compare_and_swap( + value, + compare, + result, + mpi_dtype, + team_unit_id.id, + offset, + win), + "MPI_Compare_and_swap"); + DART_LOG_DEBUG("dart_compare_and_swap > finished"); return DART_OK; } + /* -- Non-blocking dart one-sided operations -- */ dart_ret_t dart_get_handle( void * dest, dart_gptr_t gptr, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handleptr) { - MPI_Datatype mpi_type = dart__mpi__datatype(dtype); dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; @@ -906,93 +816,53 @@ dart_ret_t dart_get_handle( return DART_ERR_INVAL; } - DART_LOG_DEBUG("dart_get_handle() uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", - team_unit_id.id, offset, seg_id, gptr.teamid, nelem); - DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(*handleptr)); - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_handle: shared windows enabled"); - - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - dart_ret_t ret = get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, dtype); - // return NULL request - return ret; - } -#else - DART_LOG_DEBUG("dart_get_handle: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_RGet: - */ - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - // chunk up the get - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - char * dest_ptr = (char*) dest; + MPI_Win win = seginfo->win; dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); handle->dest = team_unit_id.id; handle->win = win; handle->needs_flush = false; - if (nchunks > 0) { - DART_LOG_TRACE("dart_get_handle: MPI_Rget (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); - if (MPI_Rget(dest_ptr, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - team_unit_id.id, - offset, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_get_handle ! MPI_Rget failed"); - return DART_ERR_INVAL; - } - offset += nchunks * MAX_CONTIG_ELEMENTS; - dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; + + DART_LOG_DEBUG("dart_get_handle() uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", + team_unit_id.id, offset, seg_id, gptr.teamid, nelem); + DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(handle)); + + dart_ret_t ret = DART_OK; + + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast-path for basic types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__get_basic(team_data, team_unit_id, seginfo, dest, + offset, nelem, src_type, + handle->reqs, &handle->num_reqs); + } else { + // slow path for derived types + ret = dart__mpi__get_complex(team_unit_id, seginfo, dest, + offset, nelem, src_type, dst_type, + handle->reqs, &handle->num_reqs); } - if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - DART_LOG_TRACE( - "dart_get_handle: MPI_Rget (dest %p, size %zu)", dest_ptr, remainder); - if (MPI_Rget(dest_ptr, - remainder, - mpi_dtype, - team_unit_id.id, - offset, - remainder, - mpi_dtype, - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_get_handle ! MPI_Rget failed"); - return DART_ERR_INVAL; - } + if (handle->num_reqs == 0) { + free(handle); + handle = DART_HANDLE_NULL; } *handleptr = handle; - DART_LOG_TRACE("dart_get_handle > handle(%p) dest:%d win:%"PRIu64, - (void*)(handle), handle->dest, - (unsigned long)win); - return DART_OK; + DART_LOG_TRACE("dart_get_handle > handle(%p) dest:%d", + (void*)(handle), handle->dest); + return ret; } dart_ret_t dart_put_handle( dart_gptr_t gptr, const void * src, size_t nelem, - dart_datatype_t dtype, + dart_datatype_t src_type, + dart_datatype_t dst_type, dart_handle_t * handleptr) { dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); @@ -1002,6 +872,8 @@ dart_ret_t dart_put_handle( *handleptr = DART_HANDLE_NULL; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_put ! failed: Unknown team %i!", teamid); @@ -1019,59 +891,44 @@ dart_ret_t dart_put_handle( } MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); // chunk up the put dart_handle_t handle = calloc(1, sizeof(struct dart_handle_struct)); handle->dest = team_unit_id.id; handle->win = win; handle->needs_flush = true; - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - const char * src_ptr = (const char*) src; - if (nchunks > 0) { - DART_LOG_TRACE("dart_put_handle: MPI_Rput (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - if (MPI_Rput(src_ptr, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - team_unit_id.id, - offset, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_put_handle ! MPI_Rput failed"); - return DART_ERR_INVAL; - } - src_ptr += nchunks * MAX_CONTIG_ELEMENTS; - offset += nchunks * MAX_CONTIG_ELEMENTS; + dart_ret_t ret = DART_OK; + + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast path for basic data types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__put_basic(team_data, team_unit_id, seginfo, src, + offset, nelem, src_type, + handle->reqs, + &handle->num_reqs, + &handle->needs_flush); + } else { + // slow path for complex data types + ret = dart__mpi__put_complex(team_unit_id, seginfo, src, + offset, nelem, src_type, dst_type, + handle->reqs, + &handle->num_reqs, + &handle->needs_flush); } - if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - DART_LOG_TRACE( - "dart_put_handle: MPI_Rput (src %p, size %zu)", src_ptr, remainder); - if (MPI_Rput(src_ptr, - remainder, - mpi_dtype, - team_unit_id.id, - offset, - remainder, - mpi_dtype, - win, - &handle->reqs[handle->num_reqs++]) != MPI_SUCCESS) { - free(handle); - DART_LOG_ERROR("dart_put_handle ! MPI_Put failed"); - return DART_ERR_INVAL; - } + if (handle->num_reqs == 0) { + free(handle); + handle = DART_HANDLE_NULL; } *handleptr = handle; - return DART_OK; + DART_LOG_TRACE("dart_put_handle > handle(%p) dest:%d", + (void*)(handle), handle->dest); + + return ret; } /* -- Blocking dart one-sided operations -- */ @@ -1080,17 +937,19 @@ dart_ret_t dart_put_handle( * \todo Check if MPI_Get_accumulate (MPI_NO_OP) yields better performance */ dart_ret_t dart_put_blocking( - dart_gptr_t gptr, - const void * src, - size_t nelem, - dart_datatype_t dtype) + dart_gptr_t gptr, + const void * src, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(gptr.teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_put_blocking ! failed: Unknown team %i!", gptr.teamid); @@ -1110,94 +969,53 @@ dart_ret_t dart_put_blocking( DART_LOG_DEBUG("dart_put_blocking() uid:%d o:%"PRIu64" s:%d t:%d, nelem:%zu", team_unit_id.id, offset, seg_id, gptr.teamid, nelem); - - /* copy data directly if we are on the same unit */ - if (team_unit_id.id == team_data->unitid) { - memcpy(seginfo->selfbaseptr + offset, src, - nelem * dart__mpi__datatype_sizeof(dtype)); - DART_LOG_DEBUG("dart_put_blocking: memcpy nelem:%zu (from global allocation)" - "offset: %"PRIu64"", nelem, offset); - return DART_OK; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_put_blocking: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return put_shared_mem(team_data, seginfo, src, offset, - team_unit_id, nelem, dtype); - } -#else - DART_LOG_DEBUG("dart_put_blocking: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - /* - * Using MPI_Put as MPI_Win_flush is required to ensure remote completion. - */ - - // chunk up the put - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - const char * src_ptr = (const char*) src; - if (nchunks > 0) { - DART_LOG_TRACE("dart_put_blocking: MPI_Put (src %p, size %zu)", - src_ptr, nchunks * MAX_CONTIG_ELEMENTS); - CHECK_MPI_RET( - MPI_Put(src_ptr, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - team_unit_id.id, - offset, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - win), - "MPI_Put"); - src_ptr += nchunks * MAX_CONTIG_ELEMENTS; - offset += nchunks * MAX_CONTIG_ELEMENTS; + dart_ret_t ret = DART_OK; + bool needs_flush = false; + + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast path for basic data types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__put_basic(team_data, team_unit_id, seginfo, src, + offset, nelem, src_type, + NULL, NULL, &needs_flush); + } else { + // slow path for complex data types + ret = dart__mpi__put_complex(team_unit_id, seginfo, src, + offset, nelem, src_type, dst_type, + NULL, NULL, &needs_flush); } - if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - DART_LOG_TRACE( - "dart_put_blocking: MPI_Put (src %p, size %zu)", src_ptr, remainder); - CHECK_MPI_RET( - MPI_Put(src_ptr, - remainder, - mpi_dtype, - team_unit_id.id, - offset, - remainder, - mpi_dtype, - win), - "MPI_Put"); + if (ret == DART_OK && needs_flush) { + DART_LOG_DEBUG("dart_put_blocking: MPI_Win_flush"); + CHECK_MPI_RET(MPI_Win_flush(team_unit_id.id, win), "MPI_Win_flush"); } - DART_LOG_DEBUG("dart_put_blocking: MPI_Win_flush"); - CHECK_MPI_RET(MPI_Win_flush(team_unit_id.id, win), "MPI_Win_flush"); - DART_LOG_DEBUG("dart_put_blocking > finished"); - return DART_OK; + return ret; } /** * \todo Check if MPI_Accumulate (REPLACE) yields better performance */ dart_ret_t dart_get_blocking( - void * dest, - dart_gptr_t gptr, - size_t nelem, - dart_datatype_t dtype) + void * dest, + dart_gptr_t gptr, + size_t nelem, + dart_datatype_t src_type, + dart_datatype_t dst_type) { dart_team_unit_t team_unit_id = DART_TEAM_UNIT_ID(gptr.unitid); uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; dart_team_t teamid = gptr.teamid; + CHECK_EQUAL_BASETYPE(src_type, dst_type); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); - if (team_data == NULL) { + if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_get_blocking ! failed: Unknown team %i!", teamid); return DART_ERR_INVAL; } @@ -1217,84 +1035,31 @@ dart_ret_t dart_get_blocking( return DART_ERR_INVAL; } - if (team_data->unitid == team_unit_id.id) { - // use direct memcpy if we are on the same unit - memcpy(dest, seginfo->selfbaseptr + offset, - nelem * dart__mpi__datatype_sizeof(dtype)); - DART_LOG_DEBUG("dart_get_blocking: memcpy nelem:%zu " - "source (coll.): offset:%lu -> dest: %p", - nelem, offset, dest); - return DART_OK; - } - -#if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) - DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); - if (seg_id >= 0 && team_data->sharedmem_tab[team_unit_id.id].id >= 0) { - return get_shared_mem(team_data, seginfo, dest, offset, - team_unit_id, nelem, dtype); - } -#else - DART_LOG_DEBUG("dart_get_blocking: shared windows disabled"); -#endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ - - /* - * MPI shared windows disabled or target and calling unit are on different - * nodes, use MPI_Rget: - */ - - MPI_Win win = seginfo->win; - offset += dart_segment_disp(seginfo, team_unit_id); - - /* - * Using MPI_Get as MPI_Win_flush is required to ensure remote completion. - */ - // chunk up the get - const size_t nchunks = nelem / MAX_CONTIG_ELEMENTS; - const size_t remainder = nelem % MAX_CONTIG_ELEMENTS; - char * dest_ptr = (char*) dest; - MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; - int nreqs = 0; - - if (nchunks > 0) { - DART_LOG_TRACE("dart_get_blocking: MPI_Rget (dest %p, size %zu)", - dest_ptr, nchunks * MAX_CONTIG_ELEMENTS); + dart_ret_t ret = DART_OK; - CHECK_MPI_RET( - MPI_Rget(dest_ptr, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - team_unit_id.id, - offset, - nchunks, - dart__mpi__max_chunk_datatype[dtype], - win, - &reqs[nreqs++]), - "MPI_Rget"); - offset += nchunks * MAX_CONTIG_ELEMENTS; - dest_ptr += nchunks * MAX_CONTIG_ELEMENTS; + MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; + uint8_t num_reqs = 0; + + // leave complex data type handling to MPI + if (dart__mpi__datatype_isbasic(src_type) && + dart__mpi__datatype_isbasic(dst_type)) { + // fast-path for basic types + CHECK_EQUAL_BASETYPE(src_type, dst_type); + ret = dart__mpi__get_basic(team_data, team_unit_id, seginfo, dest, + offset, nelem, src_type, + reqs, &num_reqs); + } else { + // slow path for derived types + ret = dart__mpi__get_complex(team_unit_id, seginfo, dest, + offset, nelem, src_type, dst_type, + reqs, &num_reqs); } - if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - DART_LOG_TRACE( - "dart_get_blocking: MPI_Rget (dest %p, size %zu)", dest_ptr, remainder); - + if (ret == DART_OK && num_reqs > 0) { CHECK_MPI_RET( - MPI_Rget(dest_ptr, - remainder, - mpi_dtype, - team_unit_id.id, - offset, - remainder, - mpi_dtype, - win, - &reqs[nreqs++]), - "MPI_Rget"); + MPI_Waitall(num_reqs, reqs, MPI_STATUSES_IGNORE), "MPI_Waitall"); } - CHECK_MPI_RET( - MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE), "MPI_Waitall"); - DART_LOG_DEBUG("dart_get_blocking > finished"); return DART_OK; } @@ -1859,14 +1624,14 @@ dart_ret_t dart_bcast( if (nchunks > 0) { CHECK_MPI_RET( MPI_Bcast(src_ptr, nchunks, - dart__mpi__max_chunk_datatype[dtype], + dart__mpi__datatype_maxtype(dtype), root.id, comm), "MPI_Bcast"); src_ptr += nchunks * MAX_CONTIG_ELEMENTS; } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Bcast(src_ptr, remainder, mpi_dtype, root.id, comm), "MPI_Bcast"); @@ -1885,6 +1650,8 @@ dart_ret_t dart_scatter( dart_team_unit_t root, dart_team_t teamid) { + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_scatter ! failed: unknown team %d", teamid); @@ -1902,7 +1669,7 @@ dart_ret_t dart_scatter( MPI_Comm comm = team_data->comm; if (nchunks > 0) { - MPI_Datatype mpi_dtype = dart__mpi__max_chunk_datatype[dtype]; + MPI_Datatype mpi_dtype = dart__mpi__datatype_maxtype(dtype); CHECK_MPI_RET( MPI_Scatter( send_ptr, @@ -1919,7 +1686,7 @@ dart_ret_t dart_scatter( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Scatter( send_ptr, @@ -1947,6 +1714,8 @@ dart_ret_t dart_gather( DART_LOG_TRACE("dart_gather() team:%d nelem:%"PRIu64"", teamid, nelem); + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_gather ! failed: unknown teamid %d", teamid); @@ -1964,7 +1733,7 @@ dart_ret_t dart_gather( MPI_Comm comm = team_data->comm; if (nchunks > 0) { - MPI_Datatype mpi_dtype = dart__mpi__max_chunk_datatype[dtype]; + MPI_Datatype mpi_dtype = dart__mpi__datatype_maxtype(dtype); CHECK_MPI_RET( MPI_Gather( send_ptr, @@ -1981,7 +1750,7 @@ dart_ret_t dart_gather( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Gather( send_ptr, @@ -2008,6 +1777,8 @@ dart_ret_t dart_allgather( DART_LOG_TRACE("dart_allgather() team:%d nelem:%"PRIu64"", teamid, nelem); + CHECK_IS_BASICTYPE(dtype); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (dart__unlikely(team_data == NULL)) { DART_LOG_ERROR("dart_allgather ! unknown teamid %d", teamid); @@ -2027,7 +1798,7 @@ dart_ret_t dart_allgather( MPI_Comm comm = team_data->comm; if (nchunks > 0) { - MPI_Datatype mpi_dtype = dart__mpi__max_chunk_datatype[dtype]; + MPI_Datatype mpi_dtype = dart__mpi__datatype_maxtype(dtype); CHECK_MPI_RET( MPI_Allgather( send_ptr, @@ -2043,7 +1814,7 @@ dart_ret_t dart_allgather( } if (remainder > 0) { - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; CHECK_MPI_RET( MPI_Allgather( send_ptr, @@ -2073,6 +1844,8 @@ dart_ret_t dart_allgatherv( DART_LOG_TRACE("dart_allgatherv() team:%d nsendelem:%"PRIu64"", teamid, nsendelem); + CHECK_IS_BASICTYPE(dtype); + /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ @@ -2112,7 +1885,7 @@ dart_ret_t dart_allgatherv( irecvdispls[i] = recvdispls[i]; } - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; if (MPI_Allgatherv( sendbuf, nsendelem, @@ -2143,8 +1916,11 @@ dart_ret_t dart_allreduce( dart_operation_t op, dart_team_t team) { + + CHECK_IS_BASICTYPE(dtype); + MPI_Op mpi_op = dart__mpi__op(op); - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; /* * MPI uses offset type int, do not copy more than INT_MAX elements: @@ -2182,9 +1958,9 @@ dart_ret_t dart_reduce( dart_team_t team) { MPI_Comm comm; + CHECK_IS_BASICTYPE(dtype); MPI_Op mpi_op = dart__mpi__op(op); - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); - + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ @@ -2217,13 +1993,14 @@ dart_ret_t dart_reduce( dart_ret_t dart_send( const void * sendbuf, - size_t nelem, - dart_datatype_t dtype, - int tag, - dart_global_unit_t unit) + size_t nelem, + dart_datatype_t dtype, + int tag, + dart_global_unit_t unit) { MPI_Comm comm; - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + CHECK_IS_BASICTYPE(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; dart_team_t team = DART_TEAM_ALL; /* @@ -2264,7 +2041,8 @@ dart_ret_t dart_recv( dart_global_unit_t unit) { MPI_Comm comm; - MPI_Datatype mpi_dtype = dart__mpi__datatype(dtype); + CHECK_IS_BASICTYPE(dtype); + MPI_Datatype mpi_dtype = dart__mpi__datatype_struct(dtype)->basic.mpi_type; dart_team_t team = DART_TEAM_ALL; /* @@ -2311,8 +2089,12 @@ dart_ret_t dart_sendrecv( dart_global_unit_t src) { MPI_Comm comm; - MPI_Datatype mpi_send_dtype = dart__mpi__datatype(send_dtype); - MPI_Datatype mpi_recv_dtype = dart__mpi__datatype(recv_dtype); + CHECK_IS_BASICTYPE(send_dtype); + CHECK_IS_BASICTYPE(recv_dtype); + MPI_Datatype mpi_send_dtype = + dart__mpi__datatype_struct(send_dtype)->basic.mpi_type; + MPI_Datatype mpi_recv_dtype = + dart__mpi__datatype_struct(recv_dtype)->basic.mpi_type; dart_team_t team = DART_TEAM_ALL; /* @@ -2352,5 +2134,3 @@ dart_ret_t dart_sendrecv( "MPI_Sendrecv"); return DART_OK; } - - diff --git a/dart-impl/mpi/src/dart_globmem.c b/dart-impl/mpi/src/dart_globmem.c index f9d95daac..1553b4a9b 100644 --- a/dart-impl/mpi/src/dart_globmem.c +++ b/dart-impl/mpi/src/dart_globmem.c @@ -368,7 +368,7 @@ dart_team_memalloc_aligned_dynamic( "baseptr:%p segid:%i across team %d", nbytes, gptr_unitid, sub_mem, segment->segid, teamid); - return DART_OK; + return DART_OK; } static dart_ret_t @@ -447,6 +447,7 @@ dart_team_memalloc_aligned( dart_datatype_t dtype, dart_gptr_t * gptr) { + CHECK_IS_BASICTYPE(dtype); #ifdef DART_MPI_ENABLE_DYNAMIC_WINDOWS return dart_team_memalloc_aligned_dynamic(teamid, nelem, dtype, gptr); #else @@ -548,6 +549,7 @@ dart_team_memregister_aligned( void * addr, dart_gptr_t * gptr) { + CHECK_IS_BASICTYPE(dtype); size_t size; int dtype_size = dart__mpi__datatype_sizeof(dtype); size_t nbytes = nelem * dtype_size; @@ -615,6 +617,7 @@ dart_team_memregister( void * addr, dart_gptr_t * gptr) { + CHECK_IS_BASICTYPE(dtype); int nil; size_t size; int dtype_size = dart__mpi__datatype_sizeof(dtype); @@ -722,5 +725,3 @@ dart_team_memderegister( unitid.id, gptr.addr_or_offs.offset, gptr.unitid, teamid); return DART_OK; } - - diff --git a/dart-impl/mpi/src/dart_mpi_types.c b/dart-impl/mpi/src/dart_mpi_types.c new file mode 100644 index 000000000..c7f22a1b8 --- /dev/null +++ b/dart-impl/mpi/src/dart_mpi_types.c @@ -0,0 +1,326 @@ +/** + * \file dart_mpi_types.c + * + * Provide functionality for creating derived data types in DART. + * + * Currently implemented: strided types based on basic types. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define DART_TYPE_NAMELEN 256 + +static const char* __dart_base_type_names[DART_TYPE_LAST+1] = { + "UNDEFINED", + "BYTE", + "SHORT", + "INT", + "UNSIGNED INT", + "LONG", + "UNSIGNED LONG", + "LONG LONG", + "FLOAT", + "DOUBLE", + "INVALID" +}; + +dart_datatype_struct_t __dart_base_types[DART_TYPE_LAST]; + +static +MPI_Datatype +create_max_datatype(MPI_Datatype mpi_type) +{ + MPI_Datatype max_type = MPI_DATATYPE_NULL; + if (mpi_type != MPI_DATATYPE_NULL) { + // create the chunk data type + int ret = MPI_Type_contiguous(MAX_CONTIG_ELEMENTS, + mpi_type, + &max_type); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to create chunk type of DART data type"); + dart_abort(-1); + } + ret = MPI_Type_commit(&max_type); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to commit chunk type of DART data type"); + dart_abort(-1); + } + } + return max_type; +} + +static void +init_basic_datatype( + dart_datatype_t dart_type_id, + MPI_Datatype mpi_type) +{ + int size; + struct dart_datatype_struct *dart_type = &__dart_base_types[dart_type_id]; + dart_type->base_type = dart_type_id; + dart_type->basic.mpi_type = mpi_type; + dart_type->kind = DART_KIND_BASIC; + dart_type->basic.size = 0; + dart_type->num_elem = 0; + if (mpi_type != MPI_DATATYPE_NULL) { + int ret = MPI_Type_size(mpi_type, &size); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("Failed to query size of MPI data type!"); + dart_abort(-1); + } + dart_type->basic.size = size; + + // basic types only represent a single element + dart_type->num_elem = 1; + + // create the type used for large transfers + dart_type->basic.max_type = create_max_datatype(mpi_type); + } +} + +dart_ret_t +dart__mpi__datatype_init() +{ + init_basic_datatype(DART_TYPE_UNDEFINED, MPI_DATATYPE_NULL); + init_basic_datatype(DART_TYPE_BYTE, MPI_BYTE); + init_basic_datatype(DART_TYPE_SHORT, MPI_SHORT); + init_basic_datatype(DART_TYPE_INT, MPI_INT); + init_basic_datatype(DART_TYPE_UINT, MPI_UNSIGNED); + init_basic_datatype(DART_TYPE_LONG, MPI_LONG); + init_basic_datatype(DART_TYPE_ULONG, MPI_UNSIGNED_LONG); + init_basic_datatype(DART_TYPE_LONGLONG, MPI_LONG_LONG); + init_basic_datatype(DART_TYPE_FLOAT, MPI_FLOAT); + init_basic_datatype(DART_TYPE_DOUBLE, MPI_DOUBLE); + + return DART_OK; +} + +char* dart__mpi__datatype_name(dart_datatype_t dart_type) +{ + char *buf = NULL; + if (dart_type <= DART_TYPE_LAST) { + buf = strdup(__dart_base_type_names[dart_type]); + } else { + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + if (dts->kind == DART_KIND_INDEXED) { + buf = malloc(DART_TYPE_NAMELEN); + char *base_name = dart__mpi__datatype_name(dts->base_type); + snprintf(buf, DART_TYPE_NAMELEN, "INDEXED(%i:%s)", + dts->indexed.num_blocks, base_name); + free(base_name); + } else if (dts->kind == DART_KIND_STRIDED){ + buf = malloc(DART_TYPE_NAMELEN); + char *base_name = dart__mpi__datatype_name(dts->base_type); + snprintf(buf, DART_TYPE_NAMELEN, "STRIDED(%zu:%i:%s)", + dts->num_elem, dts->strided.stride, base_name); + free(base_name); + } else { + DART_LOG_ERROR("INVALID data type detected!"); + } + } + return buf; +} + +dart_ret_t +dart_type_create_strided( + dart_datatype_t basetype_id, + size_t stride, + size_t blocklen, + dart_datatype_t * newtype) +{ + + if (newtype == NULL) { + DART_LOG_ERROR("newtype pointer may not be NULL!"); + return DART_ERR_INVAL; + } + + *newtype = DART_TYPE_UNDEFINED; + + dart_datatype_struct_t *basetype = dart__mpi__datatype_struct(basetype_id); + + if (basetype->kind != DART_KIND_BASIC) { + DART_LOG_ERROR("Only basic data types allowed in strided datatypes!"); + return DART_ERR_INVAL; + } + + if (stride > INT_MAX || blocklen > INT_MAX) { + DART_LOG_ERROR("dart_type_create_strided: arguments out of range (>INT_MAX)"); + return DART_ERR_INVAL; + } + + //MPI_Datatype new_mpi_dtype; + //MPI_Type_vector(num_blocks, blocklen, stride, mpi_dtype, &new_mpi_dtype); + //MPI_Type_commit(&new_mpi_dtype); + dart_datatype_struct_t *new_struct; + new_struct = malloc(sizeof(struct dart_datatype_struct)); + new_struct->base_type = basetype_id; + new_struct->kind = DART_KIND_STRIDED; + new_struct->num_elem = blocklen; + new_struct->strided.stride = stride; + + *newtype = (dart_datatype_t)new_struct; + + DART_LOG_TRACE("Created new strided data type %p", new_struct); + + return DART_OK; +} + + +MPI_Datatype +dart__mpi__create_strided_mpi( + dart_datatype_t dart_type, + size_t num_blocks) +{ + MPI_Datatype new_mpi_dtype; + dart_datatype_struct_t *dts = dart__mpi__datatype_struct(dart_type); + MPI_Type_vector( + num_blocks, // the number of blocks + dts->num_elem, // the number of elements per block + dts->strided.stride, // the number of elements between start of each block + dart__mpi__datatype_struct(dts->base_type)->basic.mpi_type, + &new_mpi_dtype); + MPI_Type_commit(&new_mpi_dtype); + return new_mpi_dtype; +} + +void +dart__mpi__destroy_strided_mpi(MPI_Datatype *mpi_type) +{ + MPI_Type_free(mpi_type); +} + +dart_ret_t +dart_type_create_indexed( + dart_datatype_t basetype, + size_t count, + const size_t blocklen[], + const size_t offset[], + dart_datatype_t * newtype) +{ + if (newtype == NULL) { + DART_LOG_ERROR("newtype pointer may not be NULL!"); + return DART_ERR_INVAL; + } + + *newtype = DART_TYPE_UNDEFINED; + dart_datatype_struct_t *basetype_struct = dart__mpi__datatype_struct(basetype); + if (basetype_struct->kind != DART_KIND_BASIC) { + DART_LOG_ERROR("Only basic data types allowed in indexed datatypes!"); + return DART_ERR_INVAL; + } + + // check for overflows + if (count > INT_MAX) { + DART_LOG_ERROR("dart_type_create_indexed: count > INT_MAX"); + return DART_ERR_INVAL; + } + + int *mpi_blocklen = malloc(sizeof(int) * count); + int *mpi_disps = malloc(sizeof(int) * count); + + size_t num_elem = 0; + for (size_t i = 0; i < count; ++i) { + if (blocklen[i] > INT_MAX) { + DART_LOG_ERROR("dart_type_create_indexed: blocklen[%zu] > INT_MAX", i); + free(mpi_blocklen); + free(mpi_disps); + return DART_ERR_INVAL; + } + if (offset[i] > INT_MAX) { + DART_LOG_ERROR("dart_type_create_indexed: offset[%zu] > INT_MAX", i); + free(mpi_blocklen); + free(mpi_disps); + return DART_ERR_INVAL; + } + mpi_blocklen[i] = blocklen[i]; + mpi_disps[i] = offset[i]; + num_elem += blocklen[i]; + } + + MPI_Datatype mpi_base_type = basetype_struct->basic.mpi_type; + MPI_Datatype new_mpi_dtype; + int ret = MPI_Type_indexed( + count, mpi_blocklen, mpi_disps, mpi_base_type, &new_mpi_dtype); + if (ret != MPI_SUCCESS) { + DART_LOG_ERROR("dart_type_create_indexed: failed to create indexed type!"); + free(mpi_blocklen); + free(mpi_disps); + return DART_ERR_INVAL; + } + + MPI_Type_commit(&new_mpi_dtype); + dart_datatype_struct_t *new_struct; + new_struct = malloc(sizeof(struct dart_datatype_struct)); + new_struct->base_type = basetype; + new_struct->kind = DART_KIND_INDEXED; + new_struct->num_elem = num_elem; + new_struct->indexed.mpi_type = new_mpi_dtype; + new_struct->indexed.blocklens = mpi_blocklen; + new_struct->indexed.offsets = mpi_disps; + new_struct->indexed.num_blocks = count; + + *newtype = (dart_datatype_t)new_struct; + + DART_LOG_TRACE("Created new indexed data type %p (mpi_type %p) with %zu elements", + new_struct, new_mpi_dtype, num_elem); + + return DART_OK; +} + +dart_ret_t +dart_type_destroy(dart_datatype_t *dart_type_ptr) +{ + if (dart_type_ptr == NULL) { + return DART_ERR_INVAL; + } + + dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(*dart_type_ptr); + + if (dart_type->kind == DART_KIND_BASIC) { + DART_LOG_ERROR("dart_type_destroy: Cannot destroy basic type!"); + return DART_ERR_INVAL; + } + + if (dart_type->kind == DART_KIND_INDEXED) { + free(dart_type->indexed.blocklens); + dart_type->indexed.blocklens = NULL; + free(dart_type->indexed.offsets); + dart_type->indexed.offsets = NULL; + MPI_Type_free(&dart_type->indexed.mpi_type); + } + + free(dart_type); + *dart_type_ptr = DART_TYPE_UNDEFINED; + + return DART_OK; +} + +static void destroy_basic_type(dart_datatype_t dart_type_id) +{ + dart_datatype_struct_t *dart_type = dart__mpi__datatype_struct(dart_type_id); + MPI_Type_free(&dart_type->basic.max_type); + dart_type->basic.max_type = MPI_DATATYPE_NULL; +} + +dart_ret_t +dart__mpi__datatype_fini() +{ + destroy_basic_type(DART_TYPE_BYTE); + destroy_basic_type(DART_TYPE_SHORT); + destroy_basic_type(DART_TYPE_INT); + destroy_basic_type(DART_TYPE_UINT); + destroy_basic_type(DART_TYPE_LONG); + destroy_basic_type(DART_TYPE_ULONG); + destroy_basic_type(DART_TYPE_LONGLONG); + destroy_basic_type(DART_TYPE_FLOAT); + destroy_basic_type(DART_TYPE_DOUBLE); + + return DART_OK; +} diff --git a/dash/include/dash/GlobAsyncRef.h b/dash/include/dash/GlobAsyncRef.h index 1a03a45e7..d80137093 100644 --- a/dash/include/dash/GlobAsyncRef.h +++ b/dash/include/dash/GlobAsyncRef.h @@ -189,11 +189,7 @@ class GlobAsyncRef nonconst_value_type get() const { nonconst_value_type value; DASH_LOG_TRACE_VAR("GlobAsyncRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&value), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &value, 1); return value; } @@ -204,11 +200,7 @@ class GlobAsyncRef * at which point the referenced value can be used. */ void get(nonconst_value_type *tptr) const { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get(static_cast(tptr), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get(_gptr, tptr, 1); } /** @@ -230,11 +222,7 @@ class GlobAsyncRef void set(const_value_type* tptr) { DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", *tptr); DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put(_gptr, static_cast(tptr), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put(_gptr, tptr, 1); } /** @@ -246,7 +234,6 @@ class GlobAsyncRef void set(const_value_type& new_value) { DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", new_value); DASH_LOG_TRACE_VAR("GlobAsyncRef.set()", _gptr); - dash::dart_storage ds(1); _value = new_value; // check that we do not overwrite the handle if it has been used before if (this->_handle != DART_HANDLE_NULL) { @@ -255,11 +242,7 @@ class GlobAsyncRef DART_OK ); } - DASH_ASSERT_RETURNS( - dart_put_handle(_gptr, static_cast(&_value), - ds.nelem, ds.dtype, &_handle), - DART_OK - ); + dash::internal::put_handle(_gptr, &_value, 1, &_handle); } /** diff --git a/dash/include/dash/GlobRef.h b/dash/include/dash/GlobRef.h index 44b67b76b..2f22c03e5 100644 --- a/dash/include/dash/GlobRef.h +++ b/dash/include/dash/GlobRef.h @@ -168,11 +168,7 @@ class GlobRef DASH_LOG_TRACE("GlobRef.T()", "conversion operator"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); nonconst_value_type t; - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &t, 1); DASH_LOG_TRACE_VAR("GlobRef.T >", _gptr); return t; } @@ -205,12 +201,7 @@ class GlobRef DASH_LOG_TRACE_VAR("GlobRef.set", _gptr); // TODO: Clarify if dart-call can be avoided if // _gptr->is_local() - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking( - _gptr, static_cast(&val), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put_blocking(_gptr, &val, 1); DASH_LOG_TRACE_VAR("GlobRef.set >", _gptr); } @@ -218,54 +209,32 @@ class GlobRef DASH_LOG_TRACE("T GlobRef.get()", "explicit get"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); nonconst_value_type t; - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &t, 1); return t; } void get(nonconst_value_type *tptr) const { DASH_LOG_TRACE("GlobRef.get(T*)", "explicit get into provided ptr"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(tptr), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, tptr, 1); } void get(nonconst_value_type& tref) const { DASH_LOG_TRACE("GlobRef.get(T&)", "explicit get into provided ref"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(static_cast(&tref), _gptr, ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::get_blocking(_gptr, &tref, 1); } void put(const_value_type& tref) { DASH_LOG_TRACE("GlobRef.put(T&)", "explicit put of provided ref"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking( - _gptr, static_cast(&tref), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put_blocking(_gptr, &tref, 1); } void put(const_value_type* tptr) { DASH_LOG_TRACE("GlobRef.put(T*)", "explicit put of provided ptr"); DASH_LOG_TRACE_VAR("GlobRef.T()", _gptr); - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking( - _gptr, static_cast(tptr), ds.nelem, ds.dtype), - DART_OK - ); + dash::internal::put_blocking(_gptr, tptr, 1); } self_t & operator+=(const nonconst_value_type& ref) { diff --git a/dash/include/dash/GlobSharedRef.h b/dash/include/dash/GlobSharedRef.h index 5154717ca..d33e2a99a 100644 --- a/dash/include/dash/GlobSharedRef.h +++ b/dash/include/dash/GlobSharedRef.h @@ -176,8 +176,7 @@ class GlobSharedRef } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.T()", _gptr); T t; - dash::dart_storage ds(1); - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype); + dash::internal::get_blocking(_gptr, &t, 1); return t; } DASH_THROW( @@ -201,8 +200,7 @@ class GlobSharedRef t = *_lptr; } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.T()", _gptr); - dash::dart_storage ds(1); - dart_get_blocking(static_cast(&t), _gptr, ds.nelem, ds.dtype); + dash::internal::get_blocking(_gptr, &t, 1); } return t; } @@ -215,7 +213,7 @@ class GlobSharedRef } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.T()", _gptr); dash::dart_storage ds(1); - dart_put_blocking(_gptr, static_cast(&val), ds.nelem, ds.dtype); + dash::internal::put_blocking(_gptr, &val, 1); } DASH_LOG_TRACE("GlobSharedRef.put >"); } @@ -233,12 +231,7 @@ class GlobSharedRef *_lptr = val; } else if (!DART_GPTR_ISNULL(_gptr)) { DASH_LOG_TRACE_VAR("GlobSharedRef.=", _gptr); - dash::dart_storage ds(1); - dart_put_blocking( - _gptr, - static_cast(&val), - ds.nelem, - ds.dtype); + dash::internal::put_blocking(_gptr, &val, 1); } DASH_LOG_TRACE("GlobSharedRef.= >"); return *this; diff --git a/dash/include/dash/Onesided.h b/dash/include/dash/Onesided.h index f0e5e5003..c6fc53925 100644 --- a/dash/include/dash/Onesided.h +++ b/dash/include/dash/Onesided.h @@ -8,8 +8,144 @@ namespace dash { +namespace internal { + + /** + * Non-blocking write of \c nelem values from \c src to the global memory + * location referenced by \c gptr. + * + * \sa dart_put + */ + template + inline + void + put(const dart_gptr_t& gptr, const T *src, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_put(gptr, + src, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + /** + * Non-blocking read of \c nelem values the global memory + * location referenced by \c gptr into memory referenced by \c src. + * + * \sa dart_get + */ + template + inline + void + get(const dart_gptr_t& gptr, T *dst, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_get(dst, + gptr, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + /** + * Blocking write of \c nelem values from \c src to the global memory + * location referenced by \c gptr. + * + * \sa dart_put_blocking + */ + template + inline + void + put_blocking(const dart_gptr_t& gptr, const T *src, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_put_blocking(gptr, + src, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + /** + * Blocking read of \c nelem values the global memory + * location referenced by \c gptr into memory referenced by \c src. + * + * \sa dart_get_blocking + */ + template + inline + void + get_blocking(const dart_gptr_t& gptr, T *dst, size_t nelem) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_get_blocking(dst, + gptr, + ds.nelem, + ds.dtype, + ds.dtype), + DART_OK); + } + + /** + * Write of \c nelem values from \c src to the global memory + * location referenced by \c gptr. Creates a handle that can be used to + * wait for completion. + * + * \sa dart_put_handle + */ + template + inline + void + put_handle( + const dart_gptr_t & gptr, + const T * src, + size_t nelem, + dart_handle_t * handle) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_put_handle(gptr, + src, + ds.nelem, + ds.dtype, + ds.dtype, + handle), + DART_OK); + } + + /** + * Non-blocking read of \c nelem values the global memory + * location referenced by \c gptr into memory referenced by \c src. + * Creates a handle that can be used to wait for completion. + * + * \sa dart_get_handle + */ + template + inline + void + get_handle( + const dart_gptr_t & gptr, + T * dst, + size_t nelem, + dart_handle_t * handle) { + dash::dart_storage ds(nelem); + DASH_ASSERT_RETURNS( + dart_get_handle(dst, + gptr, + ds.nelem, + ds.dtype, + ds.dtype, + handle), + DART_OK); + } + +} // namespace internal + /** - * Block until completion of local and global operations on a global + * Block until local and global completion of operations on a global * address. */ template @@ -17,19 +153,19 @@ void fence( const GlobPtrType & gptr) { DASH_ASSERT_RETURNS( - dart_fence(gptr.dart_gptr()), + dart_flush(gptr.dart_gptr()), DART_OK); } /** - * Block until completion of local operations on a global address. + * Block until local completion of operations on a global address. */ template void fence_local( const GlobPtrType & gptr) { DASH_ASSERT_RETURNS( - dart_fence_local(gptr.dart_gptr()), + dart_flush_local(gptr.dart_gptr()), DART_OK); } @@ -40,19 +176,14 @@ void fence_local( * \nonblocking */ template +inline void put_value_async( /// [IN] Value to set const T & newval, /// [IN] Global pointer referencing target address of value const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put(gptr.dart_gptr(), - (void *)(&newval), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::put(gptr.dart_gptr(), &newval, 1); } /** @@ -62,6 +193,7 @@ void put_value_async( * \nonblocking */ template +inline void get_value_async( /// [OUT] Local pointer that will contain the value of the /// global address @@ -69,13 +201,7 @@ void get_value_async( /// [IN] Global pointer to read const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get(ptr, - gptr.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get(gptr.dart_gptr(), ptr, 1); } /** @@ -84,19 +210,14 @@ void get_value_async( * \blocking */ template +inline void put_value( /// [IN] Value to set const T & newval, /// [IN] Global pointer referencing target address of value const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_put_blocking(gptr.dart_gptr(), - (void *)(&newval), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::put_blocking(gptr.dart_gptr(), &newval, 1); } /** @@ -105,6 +226,7 @@ void put_value( * \blocking */ template +inline void get_value( /// [OUT] Local pointer that will contain the value of the /// global address @@ -112,13 +234,7 @@ void get_value( /// [IN] Global pointer to read const GlobPtrType & gptr) { - dash::dart_storage ds(1); - DASH_ASSERT_RETURNS( - dart_get_blocking(ptr, - gptr.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get_blocking(gptr.dart_gptr(), ptr, 1); } } // namespace dash diff --git a/dash/include/dash/Types.h b/dash/include/dash/Types.h index ec40f787c..889c6e1bb 100644 --- a/dash/include/dash/Types.h +++ b/dash/include/dash/Types.h @@ -159,6 +159,12 @@ struct dart_datatype { static constexpr const dart_datatype_t value = DART_TYPE_DOUBLE; }; +template +struct dart_datatype : dart_datatype { }; + +template +struct dart_datatype : dart_datatype { }; + namespace internal { diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index f89716e22..2baff0bd2 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -174,14 +174,10 @@ ValueType * copy_impl( "left:", total_elem_left); auto cur_in_first = g_in_first + num_elem_copied; auto cur_out_first = out_first + num_elem_copied; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get_blocking( - cur_out_first, - cur_in_first.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get_blocking( + cur_in_first.dart_gptr(), + cur_out_first, + num_copy_elem); num_elem_copied += num_copy_elem; } } else { @@ -228,17 +224,7 @@ ValueType * copy_impl( "left:", total_elem_left); auto dest_ptr = out_first + num_elem_copied; auto src_gptr = cur_in_first.dart_gptr(); - dash::dart_storage ds(num_copy_elem); - if (dart_get_blocking( - dest_ptr, - src_gptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_impl", "dart_get failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_get failed"); - } + dash::internal::get_blocking(src_gptr, dest_ptr, num_copy_elem); num_elem_copied += num_copy_elem; } } @@ -327,27 +313,19 @@ dash::Future copy_async_impl( auto cur_in_first = g_in_first + num_elem_copied; auto cur_out_first = out_first + num_elem_copied; #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get( - cur_out_first, - cur_in_first.dart_gptr(), - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::get( + cur_in_first.dart_gptr(), + cur_out_first, + num_copy_elem); req_handles.push_back(in_first.dart_gptr()); #else dart_handle_t get_handle; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get_handle( - cur_out_first, - cur_in_first.dart_gptr(), - ds.nelem, - ds.dtype, - &get_handle), - DART_OK); - if (get_handle != NULL) { + dash::internal::get_handle( + cur_in_first.dart_gptr(), + cur_out_first, + num_copy_elem, + &get_handle); + if (get_handle != DART_HANDLE_NULL) { req_handles.push_back(get_handle); } #endif @@ -398,30 +376,19 @@ dash::Future copy_async_impl( auto src_gptr = cur_in_first.dart_gptr(); auto dest_ptr = out_first + num_elem_copied; #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dash::dart_storage ds(num_copy_elem); - if (dart_get( - dest_ptr, + dash::internal::get( src_gptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_async_impl", "dart_get failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_get failed"); - } + dest_ptr, + num_copy_elem); req_handles.push_back(src_gptr); #else dart_handle_t get_handle; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_get_handle( - dest_ptr, - src_gptr, - ds.nelem, - ds.dtype, - &get_handle), - DART_OK); - if (get_handle != NULL) { + dash::internal::get_handle( + src_gptr, + dest_ptr, + num_copy_elem, + &get_handle); + if (get_handle != DART_HANDLE_NULL) { req_handles.push_back(get_handle); } #endif @@ -494,14 +461,10 @@ GlobOutputIt copy_impl( "g_out_first:", out_first.pos()); auto num_elements = std::distance(in_first, in_last); - dash::dart_storage ds(num_elements); - DASH_ASSERT_RETURNS( - dart_put_blocking( - out_first.dart_gptr(), - in_first, - ds.nelem, - ds.dtype), - DART_OK); + dash::internal::put_blocking( + out_first.dart_gptr(), + in_first, + num_elements); auto out_last = out_first + num_elements; DASH_LOG_TRACE("dash::copy_impl >", @@ -538,30 +501,19 @@ dash::Future copy_async_impl( auto src_ptr = in_first; auto dest_gptr = out_first.dart_gptr(); #ifdef DASH__ALGORITHM__COPY__USE_FLUSH - dash::dart_storage ds(num_copy_elem); - if (dart_put( + dash::internal::put( dest_gptr, src_ptr, - ds.nelem, - ds.dtype) - != DART_OK) { - DASH_LOG_ERROR("dash::copy_async_impl", "dart_put failed"); - DASH_THROW( - dash::exception::RuntimeError, "dart_put failed"); - } + num_copy_elem); req_handles.push_back(dest_gptr); #else dart_handle_t put_handle; - dash::dart_storage ds(num_copy_elem); - DASH_ASSERT_RETURNS( - dart_put_handle( + dash::internal::put_handle( dest_gptr, src_ptr, - ds.nelem, - ds.dtype, - &put_handle), - DART_OK); - if (put_handle != NULL) { + num_copy_elem + &put_handle); + if (put_handle != DART_HANDLE_NULL) { req_handles.push_back(put_handle); } #endif @@ -867,7 +819,7 @@ ValueType * copy( auto total_copy_elem = in_last - in_first; // Instead of testing in_first.local() and in_last.local(), this test for - // a local-only range only requires one call to in_first.local() which + // a local-only range only requires one call to in_first.local() which // increases throughput by ~10% for local ranges. if (num_local_elem == total_copy_elem) { // Entire input range is local: diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index a4833c80e..af9a44ffd 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -10,6 +10,7 @@ #include #include +#include namespace dash { @@ -80,24 +81,29 @@ class HaloMatrixWrapper { for(auto i = rel_dim - 1; i < NumDimensions; ++i) num_elems_block *= region.region().extent(i); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_dist = it + num_elems_block; pattern_size_t stride = (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; auto ds_stride = dart_storage(stride); - HaloData halo_data{ nullptr, std::vector(0) }; + HaloData halo_data; + dart_datatype_t stride_type; + dart_type_create_strided( + ds_num_elems_block.dtype, ds_stride.nelem, + ds_num_elems_block.nelem, &stride_type); + _dart_types.push_back(stride_type); _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block, - ds_stride](HaloData& data) { - dart_get_strided_handle( - off, it.dart_gptr(), num_blocks, - ds_num_elems_block.nelem, ds_stride.nelem, - ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, region_size, ds_num_elems_block, + stride_type](HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, + stride_type, ds_num_elems_block.dtype, + &data.handle); }, std::move(halo_data) })); @@ -105,24 +111,37 @@ class HaloMatrixWrapper { // TODO more optimizations else { num_elems_block *= region.region().extent(NumDimensions - 1); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_tmp = it; - HaloData halo_data{ nullptr, std::vector(num_blocks) }; + HaloData halo_data; auto start_index = it.lpos().index; - for(auto& index : halo_data.indexes) { - index = static_cast( - dart_storage(it_tmp.lpos().index - start_index).nelem); + std::vector block_sizes(num_blocks); + std::vector block_offsets(num_blocks); + std::fill( + block_sizes.begin(), block_sizes.end(), ds_num_elems_block.nelem); + for(auto& index : block_offsets) { + index = + dart_storage(it_tmp.lpos().index - start_index).nelem; it_tmp += num_elems_block; } + dart_datatype_t index_type; + dart_type_create_indexed( + ds_num_elems_block.dtype, + num_blocks, // number of blocks + block_sizes.data(), // size of each block + block_offsets.data(), // offset of first element of each block + &index_type); + _dart_types.push_back(index_type); _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block](HaloData& data) { - dart_get_indexed_handle( - off, it.dart_gptr(), num_blocks, ds_num_elems_block.nelem, - data.indexes.data(), ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, ds_num_elems_block,region_size, index_type] + (HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, index_type, + ds_num_elems_block.dtype, &data.handle); }, std::move(halo_data) })); } @@ -132,48 +151,69 @@ class HaloMatrixWrapper { for(auto i = 0; i < rel_dim; ++i) num_elems_block *= region.region().extent(i); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_dist = it + num_elems_block; pattern_size_t stride = (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; auto ds_stride = dart_storage(stride); - HaloData halo_data{ nullptr, std::vector(0) }; + HaloData halo_data; + + dart_datatype_t stride_type; + dart_type_create_strided( + ds_num_elems_block.dtype, ds_stride.nelem, + ds_num_elems_block.nelem, &stride_type); + _dart_types.push_back(stride_type); _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block, - ds_stride](HaloData& data) { - dart_get_strided_handle( - off, it.dart_gptr(), num_blocks, - ds_num_elems_block.nelem, ds_stride.nelem, - ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, region_size, ds_num_elems_block, + stride_type](HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, + stride_type, ds_num_elems_block.dtype, + &data.handle); }, std::move(halo_data) })); } // TODO more optimizations else { num_elems_block *= region.region().extent(0); + size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region.size() / num_elems_block; + num_blocks = region_size / num_elems_block; auto it_tmp = it; - HaloData halo_data{ nullptr, std::vector(num_blocks) }; + HaloData halo_data; + std::vector block_sizes(num_blocks); + std::vector block_offsets(num_blocks); + std::fill( + block_sizes.begin(), block_sizes.end(), ds_num_elems_block.nelem); auto start_index = it.lpos().index; - for(auto& index : halo_data.indexes) { - index = static_cast( - dart_storage(it_tmp.lpos().index - start_index).nelem); + for(auto& index : block_offsets) { + index = + dart_storage(it_tmp.lpos().index - start_index).nelem; it_tmp += num_elems_block; } + + dart_datatype_t index_type; + dart_type_create_indexed( + ds_num_elems_block.dtype, + num_blocks, // number of blocks + block_sizes.data(), // size of each block + block_offsets.data(), // offset of first element of each block + &index_type); + _dart_types.push_back(index_type); + _region_data.insert(std::make_pair( region.index(), Data{ region, - [off, it, num_blocks, ds_num_elems_block](HaloData& data) { - dart_get_indexed_handle( - off, it.dart_gptr(), num_blocks, ds_num_elems_block.nelem, - data.indexes.data(), ds_num_elems_block.dtype, - STRIDED_TO_CONTIG, &data.handle); + [off, it, index_type, region_size, ds_num_elems_block] + (HaloData& data) { + dart_get_handle( + off, it.dart_gptr(), region_size, index_type, + ds_num_elems_block.dtype, &data.handle); }, std::move(halo_data) })); } @@ -183,7 +223,12 @@ class HaloMatrixWrapper { } } - ~HaloMatrixWrapper() {} + ~HaloMatrixWrapper() { + for(auto& dart_type : _dart_types) { + dart_type_destroy(&dart_type); + } + _dart_types.clear(); + } iterator begin() noexcept { return _begin; } @@ -306,8 +351,7 @@ class HaloMatrixWrapper { private: struct HaloData { - dart_handle_t handle; - std::vector indexes; + dart_handle_t handle = DART_HANDLE_NULL; }; struct Data { @@ -338,6 +382,7 @@ class HaloMatrixWrapper { const HaloBlock_t _haloblock; HaloMemory_t _halomemory; std::map _region_data; + std::vector _dart_types; iterator _begin; iterator _end; diff --git a/dash/include/dash/memory/GlobHeapMem.h b/dash/include/dash/memory/GlobHeapMem.h index 0d74fe929..5f61fb2ad 100644 --- a/dash/include/dash/memory/GlobHeapMem.h +++ b/dash/include/dash/memory/GlobHeapMem.h @@ -1176,16 +1176,13 @@ class GlobHeapMem u_num_attach_buckets, 0); dart_gptr_t u_attach_buckets_sizes_gptr = attach_buckets_sizes_gptr; dart_gptr_setunit(&u_attach_buckets_sizes_gptr, u); - dash::dart_storage ds(u_num_attach_buckets); - DASH_ASSERT_RETURNS( - dart_get_blocking( - // local dest: - u_attach_buckets_sizes.data(), - // global source: - u_attach_buckets_sizes_gptr, - // request bytes (~= number of sizes) from unit u: - ds.nelem, ds.dtype), - DART_OK); + dash::internal::get_blocking( + // global source: + u_attach_buckets_sizes_gptr, + // local dest: + u_attach_buckets_sizes.data(), + // request bytes (~= number of sizes) from unit u: + u_num_attach_buckets); // Update local snapshot of cumulative bucket sizes at unit u: for (int bi = 0; bi < u_num_attach_buckets; ++bi) { size_type single_bkt_size = u_attach_buckets_sizes[bi]; diff --git a/dash/test/dart/DARTMemAllocTest.cc b/dash/test/dart/DARTMemAllocTest.cc index 85d4b2e73..66fcc3992 100644 --- a/dash/test/dart/DARTMemAllocTest.cc +++ b/dash/test/dart/DARTMemAllocTest.cc @@ -83,6 +83,7 @@ TEST_F(DARTMemAllocTest, LocalAlloc) &neighbor_val, arr[neighbor_id], ds.nelem, + ds.dtype, ds.dtype)); ASSERT_EQ_U( diff --git a/dash/test/dart/DARTOnesidedTest.cc b/dash/test/dart/DARTOnesidedTest.cc index dedcbffe1..7d068dc86 100644 --- a/dash/test/dart/DARTOnesidedTest.cc +++ b/dash/test/dart/DARTOnesidedTest.cc @@ -24,11 +24,12 @@ TEST_F(DARTOnesidedTest, GetBlockingSingleBlock) int g_src_index = unit_src * block_size; // Copy values: dash::dart_storage ds(block_size); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); dart_get_blocking( local_array, // lptr dest (array.begin() + g_src_index).dart_gptr(), // gptr start ds.nelem, + ds.dtype, ds.dtype ); for (size_t l = 0; l < block_size; ++l) { @@ -65,11 +66,12 @@ TEST_F(DARTOnesidedTest, GetBlockingSingleBlockTeam) int g_src_index = unit_src * block_size; // Copy values: dash::dart_storage ds(block_size); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); dart_get_blocking( local_array, // lptr dest (array.begin() + g_src_index).dart_gptr(), // gptr start ds.nelem, + ds.dtype, ds.dtype ); for (size_t l = 0; l < block_size; ++l) { @@ -97,12 +99,13 @@ TEST_F(DARTOnesidedTest, GetBlockingTwoBlocks) array.barrier(); // Copy values from first two blocks: dash::dart_storage ds(num_elem_copy); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); dart_get_blocking( local_array, // lptr dest array.begin().dart_gptr(), // gptr start ds.nelem, // number of elements - ds.dtype // data type + ds.dtype, // src data type + ds.dtype // dst data type ); // Fails for elements in second block, i.e. for l < num_elem_copy: for (size_t l = 0; l < block_size; ++l) { @@ -140,7 +143,7 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) dart_handle_t handle; dash::dart_storage ds(block_size); - LOG_MESSAGE("DART storage: dtype:%d nelem:%zu", ds.dtype, ds.nelem); + LOG_MESSAGE("DART storage: dtype:%ld nelem:%zu", ds.dtype, ds.nelem); EXPECT_EQ_U( DART_OK, dart_get_handle( @@ -148,6 +151,7 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) (array.begin() + (u * block_size)).dart_gptr(), ds.nelem, ds.dtype, + ds.dtype, &handle) ); LOG_MESSAGE("dart_get_handle returned handle %p", @@ -177,349 +181,452 @@ TEST_F(DARTOnesidedTest, GetHandleAllRemote) ASSERT_EQ_U(num_elem_copy, l); } -TEST_F(DARTOnesidedTest, GetStridedHandleAllRemote) -{ - if (dash::size() < 2) { - return; + +TEST_F(DARTOnesidedTest, StridedGetSimple) { + constexpr size_t num_elem_per_unit = 120; + constexpr size_t max_stride_size = 5; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; } - using value_t = int; - const size_t block_size = 50; - size_t num_elem_total = dash::size() * block_size; + dash::barrier(); + int *buf = new int[num_elem_per_unit]; - dash::Array array(num_elem_total, dash::BLOCKED); - // Array to store local copy: - std::vector local_array(num_elem_total); - // Array of handles, one for each dart_get_handle: - std::vector handles; - handles.reserve(dash::size()); - // Assign initial values: [ 1000, 1001, 1002, ... 2000, 2001, ... ] - for (size_t i = 0; i < block_size; ++i) - array.local[i] = ((dash::myid() + 1) * 1000) + i; + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + gptr.unitid = neighbor; - array.barrier(); + for (int stride = 1; stride <= max_stride_size; stride++) { - LOG_MESSAGE("Requesting remote blocks"); - // Copy values from all blocks in strides: - size_t stride = 5; - size_t nblocks = block_size / stride; - size_t nelems_block = 3; + LOG_MESSAGE("Testing GET with stride %i", stride); - LOG_MESSAGE("DART stride: stride:%d nblocks:%d", stride, nblocks, nelems_block); + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); - LOG_MESSAGE("STRIDE_TO_STRIDE"); + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + dart_get_blocking(buf, gptr, num_elem_per_unit / stride, + new_type, DART_TYPE_INT); - for (size_t u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_strided_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, stride, - dash::dart_datatype::value, - STRIDED_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); - } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); + // the first elements should have a value + for (int i = 0; i < num_elem_per_unit / stride; ++i) { + ASSERT_EQ_U(i*stride, buf[i]); + } - LOG_MESSAGE("Validating values"); - for (size_t g = 0; g < array.size(); ++g) { - if(g % stride < nelems_block) - ASSERT_EQ_U((value_t)array[g], local_array[g]); - else - ASSERT_EQ_U(0, local_array[g]); + // global-to-local strided-to-contig + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_get_blocking(buf, gptr, num_elem_per_unit / stride, + DART_TYPE_INT, new_type); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%stride == 0) { + ASSERT_EQ_U(i/stride, buf[i]); + } else { + ASSERT_EQ_U(0, buf[i]); + } + } + dart_type_destroy(&new_type); } - dash::Team::All().barrier(); + dash::barrier(); - LOG_MESSAGE("CONTIG_TO_STRIDE"); + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); + delete[] buf; - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; +} + + +TEST_F(DARTOnesidedTest, StridedPutSimple) { + constexpr size_t num_elem_per_unit = 120; + constexpr size_t max_stride_size = 5; + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + + dart_unit_t neighbor = (dash::myid() + 1) % dash::size(); + gptr.unitid = neighbor; - EXPECT_EQ_U( - DART_OK, - dart_get_strided_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, stride, - dash::dart_datatype::value, - CONTIG_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + int *buf = new int[num_elem_per_unit]; + for (int i = 0; i < num_elem_per_unit; ++i) { + buf[i] = i; } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - for (auto g = 0, l = 0; l < local_array.size(); ++l) { - if(l % block_size == 0) - g = l; - if(l % stride < nelems_block) { - ASSERT_EQ_U((value_t)array[g], local_array[l]); - ++g; + for (int stride = 1; stride <= max_stride_size; stride++) { + + LOG_MESSAGE("Testing PUT with stride %i", stride); + + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dart_datatype_t new_type; + dart_type_create_strided(DART_TYPE_INT, stride, 1, &new_type); + + dash::barrier(); + // local-to-global strided-to-contig + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, + new_type, DART_TYPE_INT); + dash::barrier(); + + // the first elements should have a value + for (int i = 0; i < num_elem_per_unit / stride; ++i) { + ASSERT_EQ_U(i*stride, local_ptr[i]); } - else - ASSERT_EQ_U(0, local_array[l]); + // local-to-global contig-to-strided + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dash::barrier(); + dart_put_blocking(gptr, buf, num_elem_per_unit / stride, + DART_TYPE_INT, new_type); + dash::barrier(); + + // every other element should have a value + for (int i = 0; i < num_elem_per_unit; ++i) { + if (i%stride == 0) { + ASSERT_EQ_U(i/stride, local_ptr[i]); + } else { + ASSERT_EQ_U(0, local_ptr[i]); + } + } + + dart_type_destroy(&new_type); } - dash::Team::All().barrier(); + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); - LOG_MESSAGE("STRIDE_TO_CONTIG"); + delete[] buf; +} - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; +TEST_F(DARTOnesidedTest, BlockedStridedToStrided) { - EXPECT_EQ_U( - DART_OK, - dart_get_strided_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, stride, - dash::dart_datatype::value, - STRIDED_TO_CONTIG, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); - } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); + constexpr size_t num_elem_per_unit = 120; + constexpr size_t from_stride = 5; + constexpr size_t from_block_size = 2; + constexpr size_t to_stride = 2; - LOG_MESSAGE("Validating values"); - auto nelems_cont = nblocks * nelems_block; - for (auto g = 0, l = 0; g < array.size(); ++g) { - auto test_new_block = g % block_size; - if(test_new_block == 0) - l = g; - - if(test_new_block < nelems_cont) { - ASSERT_EQ_U((value_t)array[l], local_array[g]); - auto test_index = l % stride; - if(test_index < nelems_block - 1) - ++l; - else - l += stride - test_index; + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } + dash::barrier(); + + // global-to-local strided-to-contig + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + dart_datatype_t to_type; + dart_type_create_strided(DART_TYPE_INT, to_stride, 1, &to_type); + dart_datatype_t from_type; + dart_type_create_strided(DART_TYPE_INT, from_stride, + from_block_size, &from_type); + + // strided-to-strided get + dart_get_blocking(buf, gptr, num_elem_per_unit / from_stride * from_block_size, + from_type, to_type); + + int value = 0; + for (int i = 0; + i < num_elem_per_unit/from_stride*to_stride*from_block_size; + ++i) { + if (i%to_stride == 0) { + ASSERT_EQ_U(value, buf[i]); + // consider the block size we used as source + // if + if ((value%from_stride) < (from_block_size-1)) { + // expect more elements with incremented value + ++value; + } else { + value += from_stride - (from_block_size - 1); + } + } else { + ASSERT_EQ_U(0, buf[i]); } - else - ASSERT_EQ_U(0, local_array[g]); } - dash::Team::All().barrier(); + dart_type_destroy(&from_type); + dart_type_destroy(&to_type); + + dash::barrier(); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); } -TEST_F(DARTOnesidedTest, GetIndexedHandleAllRemote) -{ - if (dash::size() < 2) { - return; + +TEST_F(DARTOnesidedTest, IndexedGetSimple) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks = 5; + + std::vector blocklens(num_blocks); + std::vector offsets(num_blocks); + + // set up offsets and block lengths + size_t num_elems = 0; + for (int i = 0; i < num_blocks; ++i) { + blocklens[i] = (i+1); + offsets[i] = (i*10); + num_elems += blocklens[i]; } - using value_t = int; - const size_t block_size = 50; - size_t num_elem_total = dash::size() * block_size; + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } - dash::Array array(num_elem_total, dash::BLOCKED); - // Array to store local copy: - std::vector local_array(num_elem_total); - // Array of handles, one for each dart_get_handle: - std::vector handles; - handles.reserve(dash::size()); - // Assign initial values: [ 1000, 1001, 1002, ... 2000, 2001, ... ] - for (size_t i = 0; i < block_size; ++i) - array.local[i] = ((dash::myid() + 1) * 1000) + i; + dart_datatype_t new_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), + offsets.data(), &new_type); - array.barrier(); + dash::barrier(); - LOG_MESSAGE("Requesting remote blocks"); + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); - constexpr size_t nblocks = 7; - std::array indexes{0,5,10,20,25,40,45}; - size_t nelems_block = 3; + // indexed-to-contig + dart_get_blocking(buf, gptr, num_elems, new_type, DART_TYPE_INT); - LOG_MESSAGE("STRIDE_TO_STRIDE"); + size_t idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(local_ptr[offsets[i] + j], buf[idx]); + ++idx; + } + } - for (size_t u = 0; u < dash::size(); ++u) { - dart_handle_t handle; - - EXPECT_EQ_U( - DART_OK, - dart_get_indexed_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, indexes.data(), - dash::dart_datatype::value, - STRIDED_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + // check we haven't copied more elements than requested + for (size_t i = idx; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(0, buf[i]); } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - auto index = indexes.begin(); - size_t next_block = 0; - for (size_t g = 0; g < array.size(); ++g) { - if(g % block_size == 0 && g != 0) { - next_block += block_size; - index = indexes.begin(); - } - auto index_tmp = next_block + *index; - if(g >= index_tmp && g < index_tmp + nelems_block) { - ASSERT_EQ_U((value_t)array[g], local_array[g]); - if(g == index_tmp + nelems_block - 1) - ++index; - } - else - ASSERT_EQ_U(0, local_array[g]); + // contig-to-indexed + memset(buf, 0, sizeof(int)*num_elem_per_unit); + dart_get_blocking(buf, gptr, num_elems, DART_TYPE_INT, new_type); + + idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(local_ptr[idx], buf[offsets[i] + j]); + ++idx; + } } - dash::Team::All().barrier(); + dart_type_destroy(&new_type); - LOG_MESSAGE("CONTIG_TO_STRIDE"); + dash::barrier(); - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); +} + +TEST_F(DARTOnesidedTest, IndexedPutSimple) { + + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks = 5; - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; + std::vector blocklens(num_blocks); + std::vector offsets(num_blocks); - EXPECT_EQ_U( - DART_OK, - dart_get_indexed_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, indexes.data(), - dash::dart_datatype::value, - CONTIG_TO_STRIDED, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + // set up offsets and block lengths + size_t num_elems = 0; + for (int i = 0; i < num_blocks; ++i) { + blocklens[i] = (i+1); + offsets[i] = (i*10); + num_elems += blocklens[i]; } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - index = indexes.begin(); - next_block = 0; - - for (auto g = 0, l = 0; l < local_array.size(); ++l) { - if(l % block_size == 0 && l != 0) { - next_block += block_size; - index = indexes.begin(); - g=l; - } - auto index_tmp = next_block + *index; - if(l >= index_tmp && l < index_tmp + nelems_block) { - ASSERT_EQ_U((value_t)array[g], local_array[l]); - if(l == index_tmp + nelems_block - 1) - ++index; - ++g; + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + + dart_datatype_t new_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks, blocklens.data(), + offsets.data(), &new_type); + + dash::barrier(); + + int *buf = new int[num_elem_per_unit]; + for (int i = 0; i < num_elem_per_unit; ++i) { + buf[i] = i; + } + + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + + dash::barrier(); + + // indexed-to-contig + dart_put_blocking(gptr, buf, num_elems, new_type, DART_TYPE_INT); + + dash::barrier(); + + size_t idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(buf[offsets[i] + j], local_ptr[idx]); + ++idx; } - else - ASSERT_EQ_U(0, local_array[l]); + } + + // check we haven't copied more elements than requested + for (size_t i = idx; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(0, local_ptr[i]); + } + + // contig-to-indexed + memset(local_ptr, 0, sizeof(int)*num_elem_per_unit); + dash::barrier(); + + dart_put_blocking(gptr, buf, num_elems, DART_TYPE_INT, new_type); + dash::barrier(); + + idx = 0; + for (size_t i = 0; i < num_blocks; ++i) { + for (size_t j = 0; j < blocklens[i]; ++j) { + ASSERT_EQ_U(buf[idx], local_ptr[offsets[i] + j]); + ++idx; + } } - dash::Team::All().barrier(); + dart_type_destroy(&new_type); + + delete[] buf; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); +} - LOG_MESSAGE("STRIDE_TO_CONTIG"); - handles.clear(); - std::fill(local_array.begin(), local_array.end(), 0); +TEST_F(DARTOnesidedTest, IndexedToIndexedGet) { - for (auto u = 0; u < dash::size(); ++u) { - dart_handle_t handle; + constexpr size_t num_elem_per_unit = 120; + constexpr size_t num_blocks_to = 10; + constexpr size_t num_blocks_from = 5; - EXPECT_EQ_U( - DART_OK, - dart_get_indexed_handle( - local_array.data() + (u * block_size), - (array.begin() + (u * block_size)).dart_gptr(), - nblocks, nelems_block, indexes.data(), - dash::dart_datatype::value, - STRIDED_TO_CONTIG, - &handle) - ); - LOG_MESSAGE("dart_get_handle returned handle %p", - static_cast(handle)); - handles.push_back(handle); + std::vector blocklens_to(num_blocks_to); + std::vector offsets_to(num_blocks_to); + std::vector blocklens_from(num_blocks_from); + std::vector offsets_from(num_blocks_from); + + // set up offsets and block lengths + size_t num_elems_to = 0; + for (int i = 0; i < num_blocks_to; ++i) { + blocklens_to[i] = (i+1); + offsets_to[i] = (i*5); + num_elems_to += blocklens_to[i]; } - // Wait for completion of get operations: - LOG_MESSAGE("Waiting for completion of async requests"); - dart_waitall_local( - handles.data(), - handles.size() - ); - LOG_MESSAGE("Validating values"); - auto nelems_cont = nblocks * nelems_block; - for (auto g = 0, l = 0; g < array.size(); ++g) { - auto test_new_block = g % block_size; - if(test_new_block == 0 && g != 0) { - next_block += block_size; - index = indexes.begin(); - l=g; + size_t num_elems_from = 0; + for (int i = 0; i < num_blocks_from; ++i) { + blocklens_from[i] = (i+1)+8; + offsets_from[i] = (i*10); + num_elems_from += blocklens_from[i]; + } + + ASSERT_EQ_U(num_elems_from, num_elems_to); + + dart_gptr_t gptr; + int *local_ptr; + dart_team_memalloc_aligned( + DART_TEAM_ALL, num_elem_per_unit, DART_TYPE_INT, &gptr); + gptr.unitid = dash::myid(); + dart_gptr_getaddr(gptr, (void**)&local_ptr); + for (int i = 0; i < num_elem_per_unit; ++i) { + local_ptr[i] = i; + } + + dart_datatype_t to_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks_to, + blocklens_to.data(), + offsets_to.data(), &to_type); + + dart_datatype_t from_type; + dart_type_create_indexed(DART_TYPE_INT, num_blocks_from, + blocklens_from.data(), + offsets_from.data(), &from_type); + + dash::barrier(); + + int *buf = new int[num_elem_per_unit]; + memset(buf, 0, sizeof(int)*num_elem_per_unit); + + int *index_map_to = new int[num_elem_per_unit]; + memset(index_map_to, 0, sizeof(int)*num_elem_per_unit); + + int *index_map_from = new int[num_elem_per_unit]; + memset(index_map_from, 0, sizeof(int)*num_elem_per_unit); + + // populate the flat list of indices to copy from + size_t idx = 0; + for (size_t i = 0; i < num_blocks_from; ++i) { + for (size_t j = 0; j < blocklens_from[i]; ++j) { + index_map_from[idx] = offsets_from[i] + j; + ++idx; } - auto index_tmp = next_block + *index; - if(test_new_block < nelems_cont) { - if(g >= index_tmp && g < index_tmp + nelems_block) { - ASSERT_EQ_U((value_t)array[l], local_array[g]); - if(g == index_tmp + nelems_block - 1) - ++index; - ++l; - } - else - l += index_tmp; + } + + // populate the mapping from target indices to values + idx = 0; + for (size_t i = 0; i < num_blocks_to; ++i) { + for (size_t j = 0; j < blocklens_to[i]; ++j) { + index_map_to[offsets_to[i] + j] = index_map_from[idx]; + ++idx; } - else - ASSERT_EQ_U(0, local_array[g]); + } + // indexed-to-indexed + dart_get_blocking(buf, gptr, num_elems_to, from_type, to_type); + + for (size_t i = 0; i < num_elem_per_unit; ++i) { + ASSERT_EQ_U(local_ptr[index_map_to[i]], buf[i]); } - dash::Team::All().barrier(); + dart_type_destroy(&from_type); + dart_type_destroy(&to_type); + + dash::barrier(); + + delete[] buf; + delete[] index_map_to; + delete[] index_map_from; + // clean-up + gptr.unitid = 0; + dart_team_memfree(gptr); } + diff --git a/dash/test/dart/ThreadsafetyTest.cc b/dash/test/dart/ThreadsafetyTest.cc index 2fae06cd5..8f5adcbb9 100644 --- a/dash/test/dart/ThreadsafetyTest.cc +++ b/dash/test/dart/ThreadsafetyTest.cc @@ -203,7 +203,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { gptr_r.unitid = (team->myid() + 1) % team->size(); dash::dart_storage ds(elem_per_thread); ASSERT_EQ_U( - dart_get_blocking(check, gptr_r, ds.nelem, ds.dtype), + dart_get_blocking(check, gptr_r, ds.nelem, ds.dtype, ds.dtype), DART_OK); team->barrier(); diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index 0e150380c..7dacae307 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -224,8 +224,8 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) } for(auto i = 0; i < ext_per_dim; ++i) - delete matrix_check[i]; - delete matrix_check; + delete[] matrix_check[i]; + delete[] matrix_check; } matrix_halo.barrier(); @@ -391,10 +391,10 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic3D) for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } matrix_halo.barrier(); @@ -494,10 +494,10 @@ TEST_F(HaloTest, HaloMatrixWrapperCyclic3D) for(auto i = 0; i < ext_per_dim_check; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } dash::Team::All().barrier(); @@ -598,10 +598,10 @@ TEST_F(HaloTest, HaloMatrixWrapperFixed3D) for(auto i = 0; i < ext_per_dim_check; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } @@ -713,10 +713,10 @@ TEST_F(HaloTest, HaloMatrixWrapperMix3D) } for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } matrix_halo.barrier(); @@ -845,10 +845,10 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMix3D) for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) - delete matrix_check[i][j]; - delete matrix_check[i]; + delete[] matrix_check[i][j]; + delete[] matrix_check[i]; } - delete matrix_check; + delete[] matrix_check; } diff --git a/dash/test/halo/HaloTest.h b/dash/test/halo/HaloTest.h index 50ae13859..350a5cda1 100644 --- a/dash/test/halo/HaloTest.h +++ b/dash/test/halo/HaloTest.h @@ -12,7 +12,7 @@ class HaloTest : public dash::test::TestBase { virtual ~HaloTest() { } - static constexpr long ext_per_dim = 150; + static constexpr long ext_per_dim = 100; }; diff --git a/dash/test/memory/GlobHeapMemTest.cc b/dash/test/memory/GlobHeapMemTest.cc index df77d4561..81ba0a8af 100644 --- a/dash/test/memory/GlobHeapMemTest.cc +++ b/dash/test/memory/GlobHeapMemTest.cc @@ -224,7 +224,7 @@ TEST_F(GlobHeapMemTest, UnbalancedRealloc) // request value via DART global pointer: value_t dart_gptr_value; dash::dart_storage ds(1); - dart_get_blocking(&dart_gptr_value, gptr, ds.nelem, ds.dtype); + dart_get_blocking(&dart_gptr_value, gptr, ds.nelem, ds.dtype, ds.dtype); DASH_LOG_TRACE_VAR("GlobHeapMemTest.UnbalancedRealloc", dart_gptr_value);