Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
3682b91
Update to new openmpi version
dycz0fx Apr 9, 2016
b05c986
remove warning
dycz0fx Apr 10, 2016
6ab4c6a
add alltoallv
dycz0fx Apr 20, 2016
a39ba66
Merge remote-tracking branch 'upstream/master'
dycz0fx Apr 20, 2016
d7d4e4a
Update to current openmpi
dycz0fx Apr 20, 2016
13d3487
Update ialltoallv
dycz0fx Apr 27, 2016
85c6380
Merge remote-tracking branch 'upstream/master'
dycz0fx Apr 27, 2016
b0ac352
change bcast to use reqeust and fix the problem in tcp endpoint
dycz0fx Apr 29, 2016
db95465
Only use OPAL_THREAD_LOCK around request_complete function to avoid d…
dycz0fx May 1, 2016
9e49fb5
Merge remote-tracking branch 'upstream/master'
dycz0fx May 1, 2016
df0c674
Merge with ompi
dycz0fx May 1, 2016
b1218ba
Sync
dycz0fx Jun 12, 2016
cabf3bf
Merge remote-tracking branch 'upstream/master'
dycz0fx Jun 12, 2016
51fd66b
Merge remote-tracking branch 'upstream/master'
dycz0fx Jun 12, 2016
b9042a0
Add topo aware algorithm in base module
dycz0fx Jun 21, 2016
f17714c
Merge remote-tracking branch 'upstream/master'
dycz0fx Jun 21, 2016
12f1f21
Merge remote-tracking branch 'upstream/master'
dycz0fx Jun 22, 2016
026cb37
Swith bcast algorithm
dycz0fx Jun 22, 2016
5b1b72c
Merge remote-tracking branch 'upstream/master'
dycz0fx Jul 1, 2016
a1dbc49
Add topo ware tree
dycz0fx Jul 1, 2016
c0807bd
Add topo aware bcast and reduce
dycz0fx Jul 28, 2016
fd81098
Merge remote-tracking branch 'upstream/master'
dycz0fx Aug 8, 2016
83af2fe
Cache tree and topo
dycz0fx Aug 19, 2016
259a88e
Merge remote-tracking branch 'upstream/master'
dycz0fx Aug 19, 2016
68ff559
Fix type error
dycz0fx Aug 19, 2016
d29db05
add partial gpu support
eddy16112 Aug 25, 2016
b46a33f
checkpoint
eddy16112 Aug 25, 2016
3605c8c
Remove extra memcpy in reduce
dycz0fx Aug 29, 2016
8b1c74f
checkpoint before merge
eddy16112 Aug 29, 2016
7ef6c9c
Merge remote-tracking branch 'origin/master' into cuda-coll
eddy16112 Aug 29, 2016
5032265
adapt_cuda module
eddy16112 Sep 7, 2016
d110f11
checkpoint
eddy16112 Sep 9, 2016
14a8362
another checkpoint no CPU->GPU any more, now is CPU->CPU cudamemcpy GPU
eddy16112 Sep 12, 2016
9f558a8
create a mpool for pined cpu buffer
eddy16112 Sep 12, 2016
fd6afed
checkpoint nccl
eddy16112 Oct 16, 2016
d1bb5f2
cuda now is able to detect gpu topo
eddy16112 Oct 23, 2016
c954f21
async progress for cuda memcpy
eddy16112 Oct 25, 2016
ccb634f
fix bugs, 1 topo flag still has bug. 2, fix socket and node leader ma…
eddy16112 Oct 27, 2016
f15ee6e
checkpoint reduce
eddy16112 Oct 28, 2016
1c3e18c
checkpoint, more async memcpy
eddy16112 Oct 28, 2016
42048e9
checkpoint
eddy16112 Oct 29, 2016
793f923
checkpoint remove buff_tmp
eddy16112 Oct 29, 2016
78c0f23
checkpoint
eddy16112 Oct 29, 2016
664f8db
checkpoint, now free cpu buff list at send cb
eddy16112 Oct 29, 2016
ff4d867
check point
eddy16112 Oct 31, 2016
09b74e6
check point
eddy16112 Nov 1, 2016
d069c66
check point
eddy16112 Nov 1, 2016
72db1b2
ddt works
eddy16112 Nov 1, 2016
4f6751c
checkpoint, add new reduce
eddy16112 Nov 2, 2016
79977ab
check point
eddy16112 Nov 2, 2016
ecd3148
check point
eddy16112 Nov 3, 2016
6a84788
now record event when all children is done with op, and check event i…
eddy16112 Nov 3, 2016
3024c1f
cleanup print
eddy16112 Nov 3, 2016
9cbc99e
checkpoint
eddy16112 Nov 3, 2016
f879209
check point
eddy16112 Nov 3, 2016
315f5dd
now copy data from cpu to gpu and do op
eddy16112 Nov 3, 2016
4023560
small fix
eddy16112 Nov 3, 2016
5f47713
fix for leaf
eddy16112 Nov 3, 2016
525f1fc
minor fix
eddy16112 Nov 4, 2016
9563e4a
add another version of reduce. doing op in cpu
eddy16112 Nov 4, 2016
0893ee6
minor fix for ongoing send
eddy16112 Nov 4, 2016
733db3d
checkpoint
eddy16112 Nov 5, 2016
061c85e
use multiple stream to do op
eddy16112 Nov 15, 2016
b33dfeb
add allreduce
eddy16112 Nov 23, 2016
efc9ff1
bak
eddy16112 Dec 9, 2016
37aa38e
forget to add allreduce file
eddy16112 Dec 10, 2016
3400d5f
add missing files
eddy16112 Apr 7, 2017
0478aee
now float
eddy16112 Apr 30, 2017
5756504
add missing file
eddy16112 Apr 30, 2017
7b04bb6
add configure
dycz0fx Apr 30, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,10 @@ m4_ifdef([project_oshmem],

opal_show_subtitle "Final output"

if test "$OPAL_cuda_support" != "0"; then
AC_CONFIG_FILES([opal/datatype/cuda/Makefile])
fi

AC_CONFIG_FILES([
Makefile

Expand Down
56 changes: 56 additions & 0 deletions ompi/mca/coll/adapt/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#
# Copyright (c) 2014 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

dist_ompidata_DATA = help-mpi-coll-adapt.txt

sources = \
coll_adapt_component.c \
coll_adapt_module.c \
coll_adapt_bcast.c \
coll_adapt_ibcast.c \
coll_adapt_reduce.c \
coll_adapt_ireduce.c \
coll_adapt_allreduce.c \
coll_adapt_iallreduce.c \
coll_adapt_alltoall.c \
coll_adapt_ialltoall.c \
coll_adapt_alltoallv.c \
coll_adapt_ialltoallv.c \
coll_adapt.h \
coll_adapt_algorithms.h \
coll_adapt_context.h \
coll_adapt_context.c \
coll_adapt_inbuf.c \
coll_adapt_inbuf.h \
coll_adapt_item.c \
coll_adapt_item.h

# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

component_noinst =
component_install =
if MCA_BUILD_ompi_coll_adapt_DSO
component_install += mca_coll_adapt.la
else
component_noinst += libmca_coll_adapt.la
endif

mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_coll_adapt_la_SOURCES = $(sources)
mca_coll_adapt_la_LDFLAGS = -module -avoid-version
mca_coll_adapt_la_LIBADD =

noinst_LTLIBRARIES = $(component_noinst)
libmca_coll_adapt_la_SOURCES =$(sources)
libmca_coll_adapt_la_LDFLAGS = -module -avoid-version
144 changes: 144 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright (c) 2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file */

#ifndef MCA_COLL_ADAPT_EXPORT_H
#define MCA_COLL_ADAPT_EXPORT_H

#include "ompi_config.h"

#include "mpi.h"
#include "opal/mca/mca.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/coll/coll.h"

BEGIN_C_DECLS

typedef struct mca_coll_adapt_module_t mca_coll_adapt_module_t;

/**
* Structure to hold the adapt coll component. First it holds the
* base coll component, and then holds a bunch of
* adapt-coll-component-specific stuff (e.g., current MCA param
* values).
*/
typedef struct mca_coll_adapt_component_t {
/** Base coll component */
mca_coll_base_component_2_0_0_t super;

/** MCA parameter: Priority of this component */
int adapt_priority;

/** MCA parameter: Length of a cache line or page (in bytes) */
int adapt_control_size;

/** MCA parameter: Number of "in use" flags in each
communicator's area in the data mpool */
int adapt_comm_num_in_use_flags;

/** MCA parameter: Number of segments for each communicator in
the data mpool */
int adapt_comm_num_segments;

/** MCA parameter: Fragment size for data */
int adapt_fragment_size;

/** MCA parameter: Degree of tree for tree-based collectives */
int adapt_tree_degree;

/** MCA parameter: Number of processes to use in the
calculation of the "info" MCA parameter */
int adapt_info_comm_size;

/******* end of MCA params ********/

/** How many fragment segments are protected by a single
in-use flags. This is solely so that we can only perform
the division once and then just use the value without
having to re-calculate. */
int adapt_segs_per_inuse_flag;
} mca_coll_adapt_component_t;

/**
* Structure for the sm coll module to hang off the communicator.
* Contains communicator-specific information, including pointers
* into the per-communicator shmem data data segment for this
* comm's sm collective operations area.
*/
typedef struct mca_coll_adapt_comm_t {
/* Meta data that we get back from the common mmap allocation
function */
mca_coll_adapt_module_t *adapt_bootstrap_meta;

/** Pointer to my barrier control pages (odd index pages are
"in", even index pages are "out") */
uint32_t *mcb_barrier_control_me;

/** Pointer to my parent's barrier control pages (will be NULL
for communicator rank 0; odd index pages are "in", even
index pages are "out") */
uint32_t *mcb_barrier_control_parent;

/** Pointers to my childrens' barrier control pages (they're
contiguous in memory, so we only point to the base -- the
number of children is in my entry in the mcb_tree); will
be NULL if this process has no children (odd index pages
are "in", even index pages are "out") */
uint32_t *mcb_barrier_control_children;

/** Number of barriers that we have executed (i.e., which set
of barrier buffers to use). */
int mcb_barrier_count;

/** Operation number (i.e., which segment number to use) */
uint32_t mcb_operation_count;
} mca_coll_adapt_comm_t;

/** Coll sm module */
struct mca_coll_adapt_module_t {
/** Base module */
mca_coll_base_module_t super;

/* Whether this module has been lazily initialized or not yet */
bool enabled;

/* Data that hangs off the communicator */
mca_coll_adapt_comm_t *adapt_comm_data;

/* Underlying reduce function and module */
mca_coll_base_module_reduce_fn_t previous_reduce;
mca_coll_base_module_t *previous_reduce_module;
};
OBJ_CLASS_DECLARATION(mca_coll_adapt_module_t);

/**
* Global component instance
*/
OMPI_MODULE_DECLSPEC extern mca_coll_adapt_component_t mca_coll_adapt_component;

/*
* coll module functions
*/
int mca_coll_adapt_init_query(bool enable_progress_threads,
bool enable_mpi_threads);

mca_coll_base_module_t *
mca_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority);

/* Lazily enable a module (since it involves expensive/slow mmap
allocation, etc.) */
int ompi_coll_adapt_lazy_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm);


int mca_coll_adapt_ft_event(int state);

#endif /* MCA_COLL_ADAPT_EXPORT_H */
144 changes: 144 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt_algorithms.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/coll_base_topo.h" //ompi_coll_tree_t

int mca_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_topoaware_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_topoaware_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_two_trees_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_two_trees_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_two_chains(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_bcast_generic(void *buff, int count, struct ompi_datatype_t *dtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module, ompi_coll_tree_t *tree);

int mca_coll_adapt_bcast_two_trees_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module, ompi_coll_tree_t** trees);

int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_bininary(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_topoaware_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_topoaware_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);


int mca_coll_adapt_ibcast_two_trees_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_two_trees_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_two_chains(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module, ompi_coll_tree_t* tree);

int mca_coll_adapt_ibcast_two_trees_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module, ompi_coll_tree_t** trees);

int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_in_order_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_binary(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_pipeline(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_chain(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_topoaware_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_reduce_topoaware_chain(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);


int mca_coll_adapt_reduce_generic(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module, ompi_coll_tree_t* tree);

int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);


int mca_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module, ompi_coll_tree_t* tree);

int mca_coll_adapt_allreduce_intra_nonoverlapping(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_iallreduce(void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

int mca_coll_adapt_alltoallv(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module);

int mca_coll_adapt_ialltoallv(const void *sbuf, const int *scounts, const int *sdisps, struct ompi_datatype_t *sdtype, void* rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t *module);

//get thread id for test
static inline uint64_t gettid(void) {
pthread_t ptid = pthread_self();
uint64_t threadId = 0;
int min;
if (sizeof(threadId) < sizeof(ptid)) {
min = sizeof(threadId);
}
else
min = sizeof(ptid);
memcpy(&threadId, &ptid, min);
return threadId;
}

//print tree for test
static inline void print_tree(ompi_coll_tree_t* tree, int rank) {
int i;
printf("[%d, prev = %d, next_size = %d]:", rank, tree->tree_prev, tree->tree_nextsize);
for( i = 0; i < tree->tree_nextsize; i++ ){
printf(" %d", tree->tree_next[i]);
}
if (rank == tree->tree_root) {
printf(" root = %d", tree->tree_root);
}
printf("\n");
}

static inline int adapt_request_free(ompi_request_t** request)
{
(*request)->req_state = OMPI_REQUEST_INVALID;
OBJ_RELEASE(*request);
*request = MPI_REQUEST_NULL;
return OMPI_SUCCESS;
}
Loading