Moving mca/common/netpatterns and commpaterns to ompi/patterns.
1.1 --- a/ompi/Makefile.am Tue Feb 05 18:15:32 2013 +0000
1.2 +++ b/ompi/Makefile.am Tue Feb 05 21:52:55 2013 +0000
1.3 @@ -182,6 +182,8 @@
1.4 include mpi/Makefile.am
1.5 include mpi/man/man3/Makefile.extra
1.6 include mpiext/Makefile.am
1.7 +include patterns/net/Makefile.am
1.8 +include patterns/comm/Makefile.am
1.9
1.10 # Ensure that the man page directory exists before we try to make man
1.11 # page files (because ompi/mpi/man/man3 has no config.status-generated
2.1 --- a/ompi/mca/bcol/basesmuma/Makefile.am Tue Feb 05 18:15:32 2013 +0000
2.2 +++ b/ompi/mca/bcol/basesmuma/Makefile.am Tue Feb 05 21:52:55 2013 +0000
2.3 @@ -51,9 +51,7 @@
2.4 mca_bcol_basesmuma_la_SOURCES = $(sources)
2.5 mca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
2.6 mca_bcol_basesmuma_la_LIBADD = \
2.7 - $(btl_portals_LIBS) \
2.8 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la \
2.9 - $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la
2.10 + $(btl_portals_LIBS)
2.11
2.12
2.13 noinst_LTLIBRARIES = $(component_noinst)
3.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h Tue Feb 05 18:15:32 2013 +0000
3.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma.h Tue Feb 05 21:52:55 2013 +0000
3.3 @@ -19,7 +19,7 @@
3.4 #include "ompi/mca/coll/ml/coll_ml_allocation.h"
3.5 #include "ompi/request/request.h"
3.6 #include "ompi/proc/proc.h"
3.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
3.8 +#include "ompi/patterns/net/netpatterns.h"
3.9
3.10 #include "opal/mca/mca.h"
3.11 #include "opal/util/arch.h"
3.12 @@ -774,16 +774,16 @@
3.13 sm_buffer_mgmt colls_with_user_data;
3.14
3.15 /* recursive-doubling tree node */
3.16 - mca_common_netpatterns_pair_exchange_node_t recursive_doubling_tree;
3.17 + netpatterns_pair_exchange_node_t recursive_doubling_tree;
3.18
3.19 /* k-nomial gather/allgather tree */
3.20 - mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
3.21 + netpatterns_k_exchange_node_t knomial_allgather_tree;
3.22
3.23 /* fanin tree node - root is rank 0 */
3.24 - mca_common_netpatterns_tree_node_t fanin_node;
3.25 + netpatterns_tree_node_t fanin_node;
3.26
3.27 /* fanout tree node - root is rank 0 */
3.28 - mca_common_netpatterns_tree_node_t fanout_node;
3.29 + netpatterns_tree_node_t fanout_node;
3.30
3.31 /* index of blocking barrier memory region to use */
3.32 int index_blocking_barrier_memory_bank;
3.33 @@ -792,18 +792,18 @@
3.34 int *comm_to_sm_map;
3.35
3.36 /* reduction fanout tree */
3.37 - mca_common_netpatterns_tree_node_t* reduction_tree;
3.38 + netpatterns_tree_node_t* reduction_tree;
3.39
3.40 /* broadcast fanout tree */
3.41 - mca_common_netpatterns_tree_node_t* fanout_read_tree;
3.42 + netpatterns_tree_node_t* fanout_read_tree;
3.43
3.44 /* scatter - k-ary tree */
3.45 int scatter_kary_radix;
3.46 - mca_common_netpatterns_tree_node_t *scatter_kary_tree;
3.47 + netpatterns_tree_node_t *scatter_kary_tree;
3.48
3.49 /* Knomial exchange tree */
3.50 /* Currently used for only large message reduce */
3.51 - mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
3.52 + netpatterns_k_exchange_node_t knomial_exchange_tree;
3.53
3.54 /* sequence number offset - want to make sure that we start
3.55 * id'ing collectives with id 0, so we can have simple
4.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c Tue Feb 05 18:15:32 2013 +0000
4.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c Tue Feb 05 21:52:55 2013 +0000
4.3 @@ -135,7 +135,7 @@
4.4 volatile char* parent_data_pointer;
4.5 mca_bcol_basesmuma_header_t *my_ctl_pointer;
4.6 volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
4.7 - mca_common_netpatterns_tree_node_t* my_fanout_read_tree;
4.8 + netpatterns_tree_node_t* my_fanout_read_tree;
4.9 size_t pack_len = 0, dt_size;
4.10
4.11 void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
4.12 @@ -268,7 +268,7 @@
4.13 mca_bcol_basesmuma_module_t* bcol_module=
4.14 (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
4.15
4.16 - mca_common_netpatterns_tree_node_t* my_fanout_read_tree;
4.17 + netpatterns_tree_node_t* my_fanout_read_tree;
4.18 size_t pack_len = 0, dt_size;
4.19
4.20 void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
5.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c Tue Feb 05 18:15:32 2013 +0000
5.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c Tue Feb 05 21:52:55 2013 +0000
5.3 @@ -20,7 +20,7 @@
5.4 #include "ompi/mca/bcol/bcol.h"
5.5 #include "ompi/mca/bcol/base/base.h"
5.6 #include "ompi/mca/coll/ml/coll_ml.h"
5.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
5.8 +#include "ompi/patterns/comm/coll_ops.h"
5.9
5.10 #include "opal/dss/dss.h"
5.11
6.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c Tue Feb 05 18:15:32 2013 +0000
6.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c Tue Feb 05 21:52:55 2013 +0000
6.3 @@ -14,7 +14,7 @@
6.4 #include "ompi/constants.h"
6.5 #include "ompi/communicator/communicator.h"
6.6 #include "ompi/mca/bcol/bcol.h"
6.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
6.8 +#include "ompi/patterns/net/netpatterns.h"
6.9
6.10 #include "opal/sys/atomic.h"
6.11
6.12 @@ -52,7 +52,7 @@
6.13 volatile mca_bcol_basesmuma_header_t *child_ctl;
6.14
6.15
6.16 - mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.17 + netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.18
6.19 /* Figure out - what instance of the basesmuma bcol I am */
6.20 sequence_number = input_args->sequence_num;
6.21 @@ -134,7 +134,7 @@
6.22 volatile mca_bcol_basesmuma_header_t *child_ctl;
6.23
6.24
6.25 - mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.26 + netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.27
6.28 sequence_number = input_args->sequence_num;
6.29
7.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c Tue Feb 05 18:15:32 2013 +0000
7.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c Tue Feb 05 21:52:55 2013 +0000
7.3 @@ -14,7 +14,7 @@
7.4 #include "ompi/constants.h"
7.5 #include "ompi/communicator/communicator.h"
7.6 #include "ompi/mca/bcol/bcol.h"
7.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
7.8 +#include "ompi/patterns/net/netpatterns.h"
7.9
7.10 #include "opal/sys/atomic.h"
7.11
7.12 @@ -50,7 +50,7 @@
7.13 volatile mca_bcol_basesmuma_header_t *parent_ctl;
7.14
7.15
7.16 - mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
7.17 + netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
7.18
7.19 /* Figure out - what instance of the basesmuma bcol I am */
7.20 sequence_number = input_args->sequence_num;
8.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c Tue Feb 05 18:15:32 2013 +0000
8.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c Tue Feb 05 21:52:55 2013 +0000
8.3 @@ -20,7 +20,7 @@
8.4 #include "ompi/communicator/communicator.h"
8.5 #include "ompi/mca/bcol/bcol.h"
8.6 #include "ompi/mca/bcol/base/base.h"
8.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
8.8 +#include "ompi/patterns/net/netpatterns.h"
8.9
8.10 #include "opal/util/show_help.h"
8.11 #include "opal/align.h"
8.12 @@ -217,7 +217,7 @@
8.13 *sm_module)
8.14 {
8.15 int rc = OMPI_SUCCESS;
8.16 - rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
8.17 + rc = netpatterns_setup_recursive_knomial_tree_node(
8.18 sm_module->super.sbgp_partner_module->group_size,
8.19 sm_module->super.sbgp_partner_module->my_index,
8.20 mca_bcol_basesmuma_component.k_nomial_radix,
8.21 @@ -230,7 +230,7 @@
8.22 {
8.23 mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super;
8.24
8.25 - return mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
8.26 + return netpatterns_setup_recursive_knomial_allgather_tree_node(
8.27 sm_module->super.sbgp_partner_module->group_size,
8.28 sm_module->super.sbgp_partner_module->my_index,
8.29 mca_bcol_basesmuma_component.k_nomial_radix,
8.30 @@ -290,7 +290,7 @@
8.31 sm_module->reduction_tree = NULL;
8.32 sm_module->fanout_read_tree = NULL;
8.33
8.34 - ret=mca_common_netpatterns_setup_recursive_doubling_tree_node(
8.35 + ret=netpatterns_setup_recursive_doubling_tree_node(
8.36 module->group_size,module->my_index,
8.37 &(sm_module->recursive_doubling_tree));
8.38 if(OMPI_SUCCESS != ret) {
8.39 @@ -302,7 +302,7 @@
8.40 /* setup the fanin tree - this is used only as part of a hierarchical
8.41 * barrier, so will set this up with rank 0 as the root */
8.42 my_rank=module->my_index;
8.43 - ret=mca_common_netpatterns_setup_narray_tree(cs->radix_fanin,
8.44 + ret=netpatterns_setup_narray_tree(cs->radix_fanin,
8.45 my_rank,module->group_size,&(sm_module->fanin_node));
8.46 if(OMPI_SUCCESS != ret) {
8.47 fprintf(stderr,"Error setting up fanin tree \n");
8.48 @@ -312,7 +312,7 @@
8.49
8.50 /* setup the fanout tree - this is used only as part of a hierarchical
8.51 * barrier, so will set this up with rank 0 as the root */
8.52 - ret=mca_common_netpatterns_setup_narray_tree(cs->radix_fanout,
8.53 + ret=netpatterns_setup_narray_tree(cs->radix_fanout,
8.54 my_rank,module->group_size,&(sm_module->fanout_node));
8.55 if(OMPI_SUCCESS != ret) {
8.56 fprintf(stderr,"Error setting up fanout tree \n");
8.57 @@ -329,14 +329,14 @@
8.58 bcast_radix = cs->radix_read_tree;
8.59
8.60 /* initialize fan-out read tree */
8.61 - sm_module->fanout_read_tree=(mca_common_netpatterns_tree_node_t*) malloc(
8.62 - sizeof(mca_common_netpatterns_tree_node_t)*module->group_size);
8.63 + sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc(
8.64 + sizeof(netpatterns_tree_node_t)*module->group_size);
8.65 if( NULL == sm_module->fanout_read_tree ) {
8.66 goto Error;
8.67 }
8.68
8.69 for(i = 0; i < module->group_size; i++){
8.70 - ret = mca_common_netpatterns_setup_narray_tree(bcast_radix,
8.71 + ret = netpatterns_setup_narray_tree(bcast_radix,
8.72 i, module->group_size, &(sm_module->fanout_read_tree[i]));
8.73 if(OMPI_SUCCESS != ret) {
8.74 goto Error;
8.75 @@ -359,13 +359,13 @@
8.76 */
8.77
8.78 /* initialize reduction tree */
8.79 - sm_module->reduction_tree=(mca_common_netpatterns_tree_node_t *) malloc(
8.80 - sizeof(mca_common_netpatterns_tree_node_t )*module->group_size);
8.81 + sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc(
8.82 + sizeof(netpatterns_tree_node_t )*module->group_size);
8.83 if( NULL == sm_module->reduction_tree ) {
8.84 goto Error;
8.85 }
8.86
8.87 - ret=mca_common_netpatterns_setup_multinomial_tree(
8.88 + ret=netpatterns_setup_multinomial_tree(
8.89 cs->order_reduction_tree,module->group_size,
8.90 sm_module->reduction_tree);
8.91 if( MPI_SUCCESS != ret ) {
8.92 @@ -389,7 +389,7 @@
8.93 */
8.94 sm_module->scatter_kary_radix=cs->scatter_kary_radix;
8.95 sm_module->scatter_kary_tree=NULL;
8.96 - ret=mca_common_netpatterns_setup_narray_tree_contigous_ranks(
8.97 + ret=netpatterns_setup_narray_tree_contigous_ranks(
8.98 sm_module->scatter_kary_radix,
8.99 sm_module->super.sbgp_partner_module->group_size,
8.100 &(sm_module->scatter_kary_tree));
9.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c Tue Feb 05 18:15:32 2013 +0000
9.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c Tue Feb 05 21:52:55 2013 +0000
9.3 @@ -17,7 +17,7 @@
9.4 #include "ompi/mca/bcol/bcol.h"
9.5 #include "bcol_basesmuma.h"
9.6 #include "opal/sys/atomic.h"
9.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
9.8 +#include "ompi/patterns/net/netpatterns.h"
9.9
9.10 /*
9.11 * Initialize nonblocking barrier. This is code specific for handling
9.12 @@ -35,7 +35,7 @@
9.13 int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
9.14 int pair_rank;
9.15 mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
9.16 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node;
9.17 + netpatterns_pair_exchange_node_t *my_exchange_node;
9.18 int extra_rank, my_rank, pow_2;
9.19 mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
9.20 mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
9.21 @@ -205,7 +205,7 @@
9.22 int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
9.23 int pair_rank, start_index, restart_phase;
9.24 mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
9.25 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node;
9.26 + netpatterns_pair_exchange_node_t *my_exchange_node;
9.27 int extra_rank, my_rank, pow_2;
9.28 mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
9.29 mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
10.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c Tue Feb 05 18:15:32 2013 +0000
10.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c Tue Feb 05 21:52:55 2013 +0000
10.3 @@ -55,7 +55,7 @@
10.4 int flag_offset = 0;
10.5 volatile int8_t ready_flag;
10.6 mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
10.7 - mca_common_netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.8 + netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.9 mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
10.10 uint32_t buffer_index = input_args->buffer_index;
10.11 int *active_requests =
10.12 @@ -244,7 +244,7 @@
10.13 int flag_offset;
10.14 volatile int8_t ready_flag;
10.15 mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
10.16 - mca_common_netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.17 + netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.18 mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
10.19 uint32_t buffer_index = input_args->buffer_index;
10.20 int *active_requests =
11.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c Tue Feb 05 18:15:32 2013 +0000
11.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c Tue Feb 05 21:52:55 2013 +0000
11.3 @@ -22,7 +22,7 @@
11.4 #include "ompi/mca/mpool/base/base.h"
11.5 #include "ompi/mca/bcol/bcol.h"
11.6 #include "ompi/mca/bcol/base/base.h"
11.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
11.8 +#include "ompi/patterns/comm/coll_ops.h"
11.9
11.10 #include "opal/class/opal_object.h"
11.11 #include "opal/dss/dss.h"
12.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c Tue Feb 05 18:15:32 2013 +0000
12.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c Tue Feb 05 21:52:55 2013 +0000
12.3 @@ -19,7 +19,7 @@
12.4 #include <errno.h>
12.5
12.6 #include "ompi/proc/proc.h"
12.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
12.8 +#include "ompi/patterns/comm/coll_ops.h"
12.9
12.10 #include "opal/dss/dss.h"
12.11 #include "opal/util/error.h"
13.1 --- a/ompi/mca/bcol/bcol.h Tue Feb 05 18:15:32 2013 +0000
13.2 +++ b/ompi/mca/bcol/bcol.h Tue Feb 05 21:52:55 2013 +0000
13.3 @@ -20,7 +20,7 @@
13.4 #include "ompi/datatype/ompi_datatype.h"
13.5 #include "ompi/op/op.h"
13.6 #include "ompi/include/ompi/constants.h"
13.7 -#include "ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h"
13.8 +#include "ompi/patterns/net/netpatterns_knomial_tree.h"
13.9
13.10 #include <limits.h>
13.11
14.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload.h Tue Feb 05 18:15:32 2013 +0000
14.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload.h Tue Feb 05 21:52:55 2013 +0000
14.3 @@ -364,16 +364,16 @@
14.4 opal_list_t collfrag_pending;
14.5
14.6 /* recursive-doubling tree node */
14.7 - mca_common_netpatterns_pair_exchange_node_t recursive_doubling_tree;
14.8 + netpatterns_pair_exchange_node_t recursive_doubling_tree;
14.9
14.10 /* N exchange tree */
14.11 - mca_common_netpatterns_pair_exchange_node_t n_exchange_tree;
14.12 + netpatterns_pair_exchange_node_t n_exchange_tree;
14.13
14.14 /* Knomial exchange tree */
14.15 - mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
14.16 + netpatterns_k_exchange_node_t knomial_exchange_tree;
14.17
14.18 /* Knomial exchange tree */
14.19 - mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
14.20 + netpatterns_k_exchange_node_t knomial_allgather_tree;
14.21
14.22 /* The array will keep pre-calculated task consumption per
14.23 * algorithm
15.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c Tue Feb 05 18:15:32 2013 +0000
15.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c Tue Feb 05 21:52:55 2013 +0000
15.3 @@ -54,7 +54,7 @@
15.4 mca_bcol_iboffload_frag_t *send_fragment = NULL,
15.5 *preposted_recv_frag = NULL;
15.6
15.7 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
15.8 + netpatterns_pair_exchange_node_t *my_exchange_node =
15.9 &iboffload->recursive_doubling_tree;
15.10
15.11 IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_doubling.\n"));
15.12 @@ -364,7 +364,7 @@
15.13 /* Recursive K - ing*/
15.14 static int recursive_knomial_start_connections(struct mca_bcol_iboffload_module_t *iboffload)
15.15 {
15.16 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
15.17 + netpatterns_k_exchange_node_t *my_exchange_node =
15.18 &iboffload->knomial_exchange_tree;
15.19 int k, i, n_exchanges = my_exchange_node->n_exchanges,
15.20 **exchanges = my_exchange_node->rank_exchanges,
15.21 @@ -442,7 +442,7 @@
15.22 mca_bcol_iboffload_frag_t *send_fragment = NULL,
15.23 *preposted_recv_frag = NULL;
15.24
15.25 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
15.26 + netpatterns_k_exchange_node_t *my_exchange_node =
15.27 &iboffload->knomial_exchange_tree;
15.28 IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_knomial. Node type %d\n", my_exchange_node->node_type));
15.29
15.30 @@ -706,7 +706,7 @@
15.31
15.32 int mca_bcol_iboffload_rec_doubling_start_connections(mca_bcol_iboffload_module_t *iboffload)
15.33 {
15.34 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
15.35 + netpatterns_pair_exchange_node_t *my_exchange_node =
15.36 &iboffload->recursive_doubling_tree;
15.37
15.38 int i, n_exchanges = my_exchange_node->n_exchanges,
16.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c Tue Feb 05 18:15:32 2013 +0000
16.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c Tue Feb 05 21:52:55 2013 +0000
16.3 @@ -203,7 +203,7 @@
16.4 static int mca_bcol_iboffload_small_msg_bcast_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.5 mca_bcol_iboffload_collreq_t *coll_request)
16.6 {
16.7 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.8 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.9 &iboffload_module->recursive_doubling_tree;
16.10
16.11 int rc,
16.12 @@ -396,7 +396,7 @@
16.13 static int mca_bcol_iboffload_small_msg_bcast_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.14 mca_bcol_iboffload_collreq_t *coll_request)
16.15 {
16.16 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.17 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.18 &iboffload_module->recursive_doubling_tree;
16.19
16.20 int rc,
16.21 @@ -617,7 +617,7 @@
16.22 static int mca_bcol_iboffload_bcast_scatter_allgather_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.23 mca_bcol_iboffload_collreq_t *coll_request)
16.24 {
16.25 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.26 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.27 &iboffload_module->recursive_doubling_tree;
16.28
16.29 int rc,
16.30 @@ -857,7 +857,7 @@
16.31 static int mca_bcol_iboffload_bcast_scatter_allgather_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.32 mca_bcol_iboffload_collreq_t *coll_request)
16.33 {
16.34 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.35 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.36 &iboffload_module->recursive_doubling_tree;
16.37
16.38 int rc, dst;
17.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h Tue Feb 05 18:15:32 2013 +0000
17.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h Tue Feb 05 21:52:55 2013 +0000
17.3 @@ -364,7 +364,7 @@
17.4
17.5 static inline void bcol_iboffload_setup_binomial_connection(mca_bcol_iboffload_module_t *iboffload)
17.6 {
17.7 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
17.8 + netpatterns_pair_exchange_node_t *my_exchange_node =
17.9 &iboffload->recursive_doubling_tree;
17.10
17.11 int i, n_exchanges = my_exchange_node->n_exchanges,
18.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c Tue Feb 05 18:15:32 2013 +0000
18.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c Tue Feb 05 21:52:55 2013 +0000
18.3 @@ -178,8 +178,8 @@
18.4 free(module->endpoints);
18.5 }
18.6
18.7 - mca_common_netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
18.8 - mca_common_netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
18.9 + netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
18.10 + netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
18.11
18.12 OBJ_RELEASE(module->device->net_context);
18.13 OBJ_RELEASE(module->device);
18.14 @@ -745,7 +745,7 @@
18.15 {
18.16 int rc;
18.17 mca_bcol_iboffload_module_t *ib_module = (mca_bcol_iboffload_module_t *) super;
18.18 - rc = mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
18.19 + rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
18.20 ib_module->super.sbgp_partner_module->group_size,
18.21 ib_module->super.sbgp_partner_module->my_index,
18.22 mca_bcol_iboffload_component.k_nomial_radix,
18.23 @@ -1090,7 +1090,7 @@
18.24 /* Barrier initialization - recuresive doubling */
18.25 #if 1
18.26 if (OMPI_SUCCESS !=
18.27 - mca_common_netpatterns_setup_recursive_doubling_tree_node(
18.28 + netpatterns_setup_recursive_doubling_tree_node(
18.29 iboffload_module->group_size, my_rank,
18.30 &iboffload_module->recursive_doubling_tree)) {
18.31 IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree,"
18.32 @@ -1101,7 +1101,7 @@
18.33
18.34 /* Barrier initialization - N exchange tree */
18.35 if (OMPI_SUCCESS !=
18.36 - mca_common_netpatterns_setup_recursive_doubling_n_tree_node(
18.37 + netpatterns_setup_recursive_doubling_n_tree_node(
18.38 iboffload_module->group_size, my_rank,
18.39 cm->exchange_tree_order,
18.40 &iboffload_module->n_exchange_tree)) {
18.41 @@ -1113,7 +1113,7 @@
18.42
18.43 /* Recursive K-ing initialization - Knomial exchange tree */
18.44 if (OMPI_SUCCESS !=
18.45 - mca_common_netpatterns_setup_recursive_knomial_tree_node(
18.46 + netpatterns_setup_recursive_knomial_tree_node(
18.47 iboffload_module->group_size, my_rank,
18.48 cm->knomial_tree_order,
18.49 &iboffload_module->knomial_exchange_tree)) {
18.50 @@ -1156,7 +1156,7 @@
18.51 }
18.52 /* that should take care of that */
18.53 if (OMPI_SUCCESS !=
18.54 - mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
18.55 + netpatterns_setup_recursive_knomial_allgather_tree_node(
18.56 iboffload_module->group_size, sbgp->group_list[my_rank],
18.57 cm->k_nomial_radix, iboffload_module->super.list_n_connected,
18.58 &iboffload_module->knomial_allgather_tree)) {
19.1 --- a/ompi/mca/bcol/ptpcoll/Makefile.am Tue Feb 05 18:15:32 2013 +0000
19.2 +++ b/ompi/mca/bcol/ptpcoll/Makefile.am Tue Feb 05 21:52:55 2013 +0000
19.3 @@ -44,8 +44,7 @@
19.4 mcacomponent_LTLIBRARIES = $(component_install)
19.5 mca_bcol_ptpcoll_la_SOURCES = $(sources)
19.6 mca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
19.7 -mca_bcol_ptpcoll_la_LIBADD = \
19.8 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
19.9 +mca_bcol_ptpcoll_la_LIBADD =
19.10
19.11 noinst_LTLIBRARIES = $(component_noinst)
19.12 libmca_bcol_ptpcoll_la_SOURCES =$(sources)
20.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h Tue Feb 05 18:15:32 2013 +0000
20.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h Tue Feb 05 21:52:55 2013 +0000
20.3 @@ -20,7 +20,7 @@
20.4 #include "ompi/request/request.h"
20.5 #include "ompi/mca/pml/pml.h"
20.6 #include "ompi/mca/coll/ml/coll_ml_allocation.h"
20.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
20.8 +#include "ompi/patterns/net/netpatterns.h"
20.9
20.10 BEGIN_C_DECLS
20.11
20.12 @@ -297,7 +297,7 @@
20.13 int full_narray_tree_num_leafs;
20.14
20.15 /* Nary tree info */
20.16 - mca_common_netpatterns_tree_node_t *narray_node;
20.17 + netpatterns_tree_node_t *narray_node;
20.18
20.19 /* if the rank in group, it keeps the extra peer.
20.20 if the rank is extra, it keeps the proxy peer.
20.21 @@ -328,13 +328,13 @@
20.22 /* number of extra peers , maximum k - 1*/
20.23 int narray_knomial_proxy_num;
20.24 /* Narray-Knomial node information array */
20.25 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
20.26 + netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
20.27 /* Knomial exchange tree */
20.28 - mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
20.29 + netpatterns_k_exchange_node_t knomial_exchange_tree;
20.30 /* knomial allgather tree --- Do not disable, we need both
20.31 different algorithms define recursive k - ing differently
20.32 */
20.33 - mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
20.34 + netpatterns_k_exchange_node_t knomial_allgather_tree;
20.35
20.36 /* Knomial allgather offsets */
20.37 int **allgather_offsets;
21.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c Tue Feb 05 18:15:32 2013 +0000
21.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c Tue Feb 05 21:52:55 2013 +0000
21.3 @@ -34,7 +34,7 @@
21.4 mca_bcol_ptpcoll_module_t *ptpcoll_module =
21.5 (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
21.6
21.7 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
21.8 + netpatterns_k_exchange_node_t *my_exchange_node =
21.9 &ptpcoll_module->knomial_exchange_tree;
21.10
21.11 int rc, k, pair_comm_rank, exchange, completed,
21.12 @@ -223,7 +223,7 @@
21.13 mca_bcol_ptpcoll_module_t *ptpcoll_module =
21.14 (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
21.15
21.16 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
21.17 + netpatterns_k_exchange_node_t *my_exchange_node =
21.18 &ptpcoll_module->knomial_exchange_tree;
21.19
21.20 int rc, k, tag, pair_comm_rank, exchange,
21.21 @@ -371,7 +371,7 @@
21.22 mca_bcol_ptpcoll_module_t *ptpcoll_module =
21.23 (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
21.24
21.25 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
21.26 + netpatterns_k_exchange_node_t *my_exchange_node =
21.27 &ptpcoll_module->knomial_exchange_tree;
21.28
21.29 ompi_communicator_t *comm =
21.30 @@ -862,7 +862,7 @@
21.31
21.32 static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
21.33 {
21.34 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node;
21.35 + netpatterns_k_exchange_node_t *my_exchange_node;
21.36 mca_bcol_ptpcoll_module_t * ptpcoll_module =
21.37 (mca_bcol_ptpcoll_module_t *) super;
21.38
22.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c Tue Feb 05 18:15:32 2013 +0000
22.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c Tue Feb 05 21:52:55 2013 +0000
22.3 @@ -141,7 +141,7 @@
22.4 int count = input_args->count * input_args->dtype->super.size;
22.5 int *active_requests =
22.6 &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
22.7 - mca_common_netpatter_knomial_step_info_t step_info = {0, 0, 0};
22.8 + netpatterns_knomial_step_info_t step_info = {0, 0, 0};
22.9
22.10 PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
22.11 const_args->index_of_this_type_in_collective + 1,
22.12 @@ -529,7 +529,7 @@
22.13 /* No data was received. Waiting for data */
22.14 if (0 == (*active_requests)) {
22.15 int extra_root = -1;
22.16 - mca_common_netpatter_knomial_step_info_t step_info;
22.17 + netpatterns_knomial_step_info_t step_info;
22.18 /* We can not block. So run couple of test for data arrival */
22.19 if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
22.20 PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)",
22.21 @@ -624,7 +624,7 @@
22.22 int matched = 0;
22.23 int k_level, logk_level;
22.24 int extra_root = -1;
22.25 - mca_common_netpatter_knomial_step_info_t step_info;
22.26 + netpatterns_knomial_step_info_t step_info;
22.27
22.28 PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d",
22.29 const_args->index_of_this_type_in_collective + 1,
22.30 @@ -694,7 +694,7 @@
22.31 }
22.32 }
22.33
22.34 - data_src = mca_common_netpatterns_get_knomial_data_source(
22.35 + data_src = netpatterns_get_knomial_data_source(
22.36 my_group_index, group_root_index, radix, ptpcoll_module->pow_knum,
22.37 &k_level, &logk_level);
22.38
22.39 @@ -1709,8 +1709,8 @@
22.40 int group_size = ptpcoll_module->full_narray_tree_size;
22.41 int completed = 0;
22.42 int virtual_root;
22.43 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
22.44 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
22.45 + netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
22.46 + netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
22.47
22.48 PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
22.49 const_args->index_of_this_type_in_collective + 1,
22.50 @@ -2032,7 +2032,7 @@
22.51 int matched = true;
22.52 int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
22.53 int relative_group_index = 0;
22.54 - mca_common_netpatterns_tree_node_t *narray_node = NULL;
22.55 + netpatterns_tree_node_t *narray_node = NULL;
22.56
22.57 PTPCOLL_VERBOSE(3, ("Bcast, Narray tree Progress"));
22.58
22.59 @@ -2119,7 +2119,7 @@
22.60 int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
22.61 int group_root_index;
22.62 int relative_group_index = 0;
22.63 - mca_common_netpatterns_tree_node_t *narray_node = NULL;
22.64 + netpatterns_tree_node_t *narray_node = NULL;
22.65
22.66 PTPCOLL_VERBOSE(3, ("Bcast, Narray tree"));
22.67
23.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h Tue Feb 05 18:15:32 2013 +0000
23.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h Tue Feb 05 21:52:55 2013 +0000
23.3 @@ -756,9 +756,9 @@
23.4 ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
23.5 ompi_request_t **requests =
23.6 ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
23.7 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_node =
23.8 + netpatterns_narray_knomial_tree_node_t *narray_node =
23.9 &ptpcoll_module->narray_knomial_node[relative_group_index];
23.10 - mca_common_netpatterns_k_exchange_node_t *k_node =
23.11 + netpatterns_k_exchange_node_t *k_node =
23.12 &narray_node->k_node;
23.13 mca_bcol_ptpcoll_component_t *cm =
23.14 &mca_bcol_ptpcoll_component;
24.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c Tue Feb 05 18:15:32 2013 +0000
24.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c Tue Feb 05 21:52:55 2013 +0000
24.3 @@ -264,7 +264,7 @@
24.4 {
24.5 mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super;
24.6 int rc = 0;
24.7 - rc = mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
24.8 + rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
24.9 p2p_module->super.sbgp_partner_module->group_size,
24.10 p2p_module->super.sbgp_partner_module->my_index,
24.11 mca_bcol_ptpcoll_component.k_nomial_radix,
24.12 @@ -314,7 +314,7 @@
24.13
24.14 ptpcoll_module->narray_knomial_node = calloc(
24.15 ptpcoll_module->full_narray_tree_size,
24.16 - sizeof(mca_common_netpatterns_narray_knomial_tree_node_t));
24.17 + sizeof(netpatterns_narray_knomial_tree_node_t));
24.18 if(NULL == ptpcoll_module->narray_knomial_node) {
24.19 goto Error;
24.20 }
24.21 @@ -345,7 +345,7 @@
24.22 }
24.23 /* Setting node info */
24.24 for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
24.25 - rc = mca_common_netpatterns_setup_narray_knomial_tree(
24.26 + rc = netpatterns_setup_narray_knomial_tree(
24.27 cm->narray_knomial_radix,
24.28 i,
24.29 ptpcoll_module->full_narray_tree_size,
24.30 @@ -381,13 +381,13 @@
24.31 mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
24.32
24.33 ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size,
24.34 - sizeof(mca_common_netpatterns_tree_node_t));
24.35 + sizeof(netpatterns_tree_node_t));
24.36 if(NULL == ptpcoll_module->narray_node ) {
24.37 goto Error;
24.38 }
24.39
24.40 for(i = 0; i < ptpcoll_module->group_size; i++) {
24.41 - rc = mca_common_netpatterns_setup_narray_tree(
24.42 + rc = netpatterns_setup_narray_tree(
24.43 cm->narray_radix,
24.44 i,
24.45 ptpcoll_module->group_size,
24.46 @@ -509,7 +509,7 @@
24.47 static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
24.48 {
24.49 int rc = OMPI_SUCCESS;
24.50 - rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
24.51 + rc = netpatterns_setup_recursive_knomial_tree_node(
24.52 ptpcoll_module->group_size,
24.53 ptpcoll_module->super.sbgp_partner_module->my_index,
24.54 mca_bcol_ptpcoll_component.k_nomial_radix,
25.1 --- a/ompi/mca/coll/ml/Makefile.am Tue Feb 05 18:15:32 2013 +0000
25.2 +++ b/ompi/mca/coll/ml/Makefile.am Tue Feb 05 21:52:55 2013 +0000
25.3 @@ -68,18 +68,9 @@
25.4 mcacomponent_LTLIBRARIES = $(component_install)
25.5 mca_coll_ml_la_SOURCES = $(sources)
25.6 mca_coll_ml_la_LDFLAGS = -module -avoid-version
25.7 -mca_coll_ml_la_LIBADD = \
25.8 - $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la \
25.9 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
25.10 +mca_coll_ml_la_LIBADD =
25.11
25.12
25.13 noinst_LTLIBRARIES = $(component_noinst)
25.14 libmca_coll_ml_la_SOURCES =$(sources)
25.15 libmca_coll_ml_la_LDFLAGS = -module -avoid-version
25.16 -
25.17 -$(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la: foo.c
25.18 - cd $(top_ompi_builddir)/ompi/mca/common/commpatterns && $(MAKE)
25.19 -$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la: foo.c
25.20 - cd $(top_ompi_builddir)/ompi/mca/common/netpatterns && $(MAKE)
25.21 -
25.22 -foo.c:
26.1 --- a/ompi/mca/coll/ml/coll_ml_component.c Tue Feb 05 18:15:32 2013 +0000
26.2 +++ b/ompi/mca/coll/ml/coll_ml_component.c Tue Feb 05 21:52:55 2013 +0000
26.3 @@ -31,7 +31,7 @@
26.4 #include "coll_ml.h"
26.5 #include "coll_ml_inlines.h"
26.6
26.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
26.8 +#include "ompi/patterns/net/netpatterns.h"
26.9 #include "coll_ml_mca.h"
26.10 #include "coll_ml_custom_utils.h"
26.11
26.12 @@ -440,7 +440,7 @@
26.13 OBJ_CONSTRUCT(&(cs->sequential_collectives_mutex), opal_mutex_t);
26.14 OBJ_CONSTRUCT(&(cs->sequential_collectives), opal_list_t);
26.15
26.16 - rc = ompi_common_netpatterns_init();
26.17 + rc = netpatterns_init();
26.18 if (OMPI_SUCCESS != rc) {
26.19 return rc;
26.20 }
27.1 --- a/ompi/mca/coll/ml/coll_ml_custom_utils.c Tue Feb 05 18:15:32 2013 +0000
27.2 +++ b/ompi/mca/coll/ml/coll_ml_custom_utils.c Tue Feb 05 21:52:55 2013 +0000
27.3 @@ -30,7 +30,7 @@
27.4 #include "ompi/mca/coll/base/base.h"
27.5 #include "ompi/mca/coll/ml/coll_ml.h"
27.6 #include "ompi/mca/coll/ml/coll_ml_inlines.h"
27.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
27.8 +#include "ompi/patterns/comm/coll_ops.h"
27.9
27.10 #include "ompi/datatype/ompi_datatype.h"
27.11 #include "ompi/communicator/communicator.h"
28.1 --- a/ompi/mca/coll/ml/coll_ml_mca.c Tue Feb 05 18:15:32 2013 +0000
28.2 +++ b/ompi/mca/coll/ml/coll_ml_mca.c Tue Feb 05 21:52:55 2013 +0000
28.3 @@ -22,7 +22,7 @@
28.4 #include "coll_ml_inlines.h"
28.5 #include "coll_ml_mca.h"
28.6 #include "coll_ml_lmngr.h"
28.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
28.8 +#include "ompi/patterns/net/netpatterns.h"
28.9 #include "opal/mca/installdirs/installdirs.h"
28.10
28.11 /*
29.1 --- a/ompi/mca/coll/ml/coll_ml_module.c Tue Feb 05 18:15:32 2013 +0000
29.2 +++ b/ompi/mca/coll/ml/coll_ml_module.c Tue Feb 05 21:52:55 2013 +0000
29.3 @@ -31,7 +31,7 @@
29.4 #include "ompi/mca/sbgp/base/base.h"
29.5 #include "ompi/mca/bcol/base/base.h"
29.6 #include "ompi/mca/sbgp/sbgp.h"
29.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
29.8 +#include "ompi/patterns/comm/coll_ops.h"
29.9 #include "ompi/mca/coll/ml/coll_ml.h"
29.10
29.11 #include "opal/util/argv.h"
30.1 --- a/ompi/mca/common/commpatterns/Makefile.am Tue Feb 05 18:15:32 2013 +0000
30.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
30.3 @@ -1,104 +0,0 @@
30.4 -#
30.5 -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
30.6 -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
30.7 -# $COPYRIGHT$
30.8 -#
30.9 -# Additional copyrights may follow
30.10 -#
30.11 -# $HEADER$
30.12 -#
30.13 -
30.14 -# A word of explanation...
30.15 -#
30.16 -# This library is linked against various MCA components because all
30.17 -# shared-memory based components (e.g., mpool, ptl, etc.) need to
30.18 -# share some common code and data. There's two cases:
30.19 -#
30.20 -# 1. libmca_common_commpatterns.la is a shared library. By linking that shared
30.21 -# library to all components that need it, the OS linker will
30.22 -# automatically load it into the process as necessary, and there will
30.23 -# only be one copy (i.e., all the components will share *one* copy of
30.24 -# the code and data).
30.25 -#
30.26 -# 2. libmca_common_commpatterns.la is a static library. In this case, it will
30.27 -# be rolled up into the top-level libmpi.la. It will also be rolled
30.28 -# into each component, but then the component will also be rolled up
30.29 -# into the upper-level libmpi.la. Linkers universally know how to
30.30 -# "figure this out" so that we end up with only one copy of the code
30.31 -# and data.
30.32 -#
30.33 -# Note that building this common component statically and linking
30.34 -# against other dynamic components is *not* supported!
30.35 -
30.36 -EXTRA_DIST = .windows
30.37 -
30.38 -# Header files
30.39 -
30.40 -headers = \
30.41 - common_coll_ops.h \
30.42 - common_netpatterns.h \
30.43 - ompi_common_netpatterns_macros.h
30.44 -
30.45 -# Source files
30.46 -
30.47 -sources = \
30.48 - common_allreduce.c \
30.49 - common_allgather.c \
30.50 - common_bcast.c
30.51 -
30.52 -# As per above, we'll either have an installable or noinst result.
30.53 -# The installable one should follow the same MCA prefix naming rules
30.54 -# (i.e., libmca_<type>_<name>.la). The noinst one can be named
30.55 -# whatever it wants, although libmca_<type>_<name>_noinst.la is
30.56 -# recommended.
30.57 -
30.58 -# To simplify components that link to this library, we will *always*
30.59 -# have an output libtool library named libmca_<type>_<name>.la -- even
30.60 -# for case 2) described above (i.e., so there's no conditional logic
30.61 -# necessary in component Makefile.am's that link to this library).
30.62 -# Hence, if we're creating a noinst version of this library (i.e.,
30.63 -# case 2), we sym link it to the libmca_<type>_<name>.la name
30.64 -# (libtool will do the Right Things under the covers). See the
30.65 -# all-local and clean-local rules, below, for how this is effected.
30.66 -
30.67 -lib_LTLIBRARIES =
30.68 -noinst_LTLIBRARIES =
30.69 -comp_inst = libmca_common_commpatterns.la
30.70 -comp_noinst = libmca_common_commpatterns_noinst.la
30.71 -
30.72 -if MCA_BUILD_ompi_common_commpatterns_DSO
30.73 -lib_LTLIBRARIES += $(comp_inst)
30.74 -else
30.75 -noinst_LTLIBRARIES += $(comp_noinst)
30.76 -endif
30.77 -
30.78 -libmca_common_commpatterns_la_SOURCES = $(headers) $(sources)
30.79 -libmca_common_commpatterns_noinst_la_SOURCES = $(libmca_common_commpatterns_la_SOURCES)
30.80 -libmca_common_commpatterns_la_LIBADD = \
30.81 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
30.82 -
30.83 -# These two rules will sym link the "noinst" libtool library filename
30.84 -# to the installable libtool library filename in the case where we are
30.85 -# compiling this component statically (case 2), described above).
30.86 -
30.87 -all-local:
30.88 - if test -z "$(lib_LTLIBRARIES)"; then \
30.89 - rm -f "$(comp_inst)"; \
30.90 - $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
30.91 - fi
30.92 -
30.93 -clean-local:
30.94 - if test -z "$(lib_LTLIBRARIES)"; then \
30.95 - rm -f "$(comp_inst)"; \
30.96 - fi
30.97 -# The code below guaranty that the netpatterns will be build before commpatterns
30.98 -FORCE:
30.99 -
30.100 -$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la: FORCE
30.101 - (cd $(top_ompi_builddir)/ompi/mca/common/netpatterns/ && $(MAKE) $(AM_MAKEFLAGS) libmca_common_netpatterns.la)
30.102 -
30.103 -install-libmca_common_netpatterns: FORCE
30.104 - (cd $(top_ompi_builddir)/ompi/mca/common/netpatterns/ && $(MAKE) $(AM_MAKEFLAGS) install)
30.105 -
30.106 -install: install-libmca_common_netpatterns install-am
30.107 -
31.1 --- a/ompi/mca/common/commpatterns/common_allgather.c Tue Feb 05 18:15:32 2013 +0000
31.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
31.3 @@ -1,288 +0,0 @@
31.4 -/*
31.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
31.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
31.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
31.8 - * All rights reserved.
31.9 - * $COPYRIGHT$
31.10 - *
31.11 - * Additional copyrights may follow
31.12 - *
31.13 - * $HEADER$
31.14 - */
31.15 -/** @file */
31.16 -
31.17 -#include "ompi_config.h"
31.18 -
31.19 -#include "ompi/constants.h"
31.20 -#include "ompi/op/op.h"
31.21 -#include "ompi/datatype/ompi_datatype.h"
31.22 -#include "ompi/communicator/communicator.h"
31.23 -#include "opal/include/opal/sys/atomic.h"
31.24 -#include "common_coll_ops.h"
31.25 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
31.26 -#include "ompi/mca/pml/pml.h"
31.27 -
31.28 -/**
31.29 - * All-reduce - subgroup in communicator
31.30 - */
31.31 -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
31.32 - ompi_datatype_t *dtype, int my_rank_in_group,
31.33 - int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
31.34 -{
31.35 - /* local variables */
31.36 - int rc=OMPI_SUCCESS,msg_cnt;
31.37 - int pair_rank,exchange,extra_rank, n_extra_nodes,n_extra;
31.38 - int proc_block,extra_start,extra_end,iovec_len;
31.39 - int remote_data_start_rank,remote_data_end_rank;
31.40 - int local_data_start_rank;
31.41 - mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
31.42 - size_t message_extent,current_data_extent,current_data_count;
31.43 - size_t dt_size;
31.44 - OPAL_PTRDIFF_TYPE dt_extent;
31.45 - char *src_buf_current;
31.46 - char *dest_buf_current;
31.47 - struct iovec send_iov[2] = {{0,0},{0,0}},
31.48 - recv_iov[2] = {{0,0},{0,0}};
31.49 - ompi_request_t *requests[4];
31.50 -
31.51 - /* get size of data needed - same layout as user data, so that
31.52 - * we can apply the reudction routines directly on these buffers
31.53 - */
31.54 - rc = ompi_datatype_type_size(dtype, &dt_size);
31.55 - if( OMPI_SUCCESS != rc ) {
31.56 - goto Error;
31.57 - }
31.58 -
31.59 - rc = ompi_datatype_type_extent(dtype, &dt_extent);
31.60 - if( OMPI_SUCCESS != rc ) {
31.61 - goto Error;
31.62 - }
31.63 - message_extent = dt_extent*count;
31.64 -
31.65 - /* place my data in the correct destination buffer */
31.66 - rc=ompi_datatype_copy_content_same_ddt(dtype,count,
31.67 - (char *)dest_buf+my_rank_in_group*message_extent,
31.68 - (char *)src_buf);
31.69 - if( OMPI_SUCCESS != rc ) {
31.70 - goto Error;
31.71 - }
31.72 -
31.73 - /* 1 process special case */
31.74 - if(1 == n_peers) {
31.75 - return OMPI_SUCCESS;
31.76 - }
31.77 -
31.78 - /* get my reduction communication pattern */
31.79 - rc = mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,
31.80 - my_rank_in_group, &my_exchange_node);
31.81 - if(OMPI_SUCCESS != rc){
31.82 - return rc;
31.83 - }
31.84 -
31.85 - n_extra_nodes=n_peers-my_exchange_node.n_largest_pow_2;
31.86 -
31.87 - /* get the data from the extra sources */
31.88 - if(0 < my_exchange_node.n_extra_sources) {
31.89 -
31.90 - if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
31.91 -
31.92 - /*
31.93 - ** Receive data from extra node
31.94 - */
31.95 -
31.96 - extra_rank=my_exchange_node.rank_extra_source;
31.97 - /* receive the data into the correct location - will use 2
31.98 - * messages in the recursive doubling phase */
31.99 - dest_buf_current=(char *)dest_buf+message_extent*extra_rank;
31.100 - rc=MCA_PML_CALL(recv(dest_buf_current,
31.101 - count,dtype,ranks_in_comm[extra_rank],
31.102 - -OMPI_COMMON_TAG_ALLREDUCE,
31.103 - comm, MPI_STATUSES_IGNORE));
31.104 - if( 0 > rc ) {
31.105 - goto Error;
31.106 - }
31.107 -
31.108 - } else {
31.109 -
31.110 - /*
31.111 - ** Send data to "partner" node
31.112 - */
31.113 - extra_rank=my_exchange_node.rank_extra_source;
31.114 - src_buf_current=(char *)src_buf;
31.115 - rc=MCA_PML_CALL(send(src_buf_current,
31.116 - count,dtype,ranks_in_comm[extra_rank],
31.117 - -OMPI_COMMON_TAG_ALLREDUCE,
31.118 - MCA_PML_BASE_SEND_STANDARD,
31.119 - comm));
31.120 - if( 0 > rc ) {
31.121 - goto Error;
31.122 - }
31.123 - }
31.124 - }
31.125 -
31.126 - current_data_extent=message_extent;
31.127 - current_data_count=count;
31.128 - src_buf_current=(char *)dest_buf+my_rank_in_group*message_extent;
31.129 - proc_block=1;
31.130 - local_data_start_rank=my_rank_in_group;
31.131 - /* loop over data exchanges */
31.132 - for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
31.133 -
31.134 - /* is the remote data read */
31.135 - pair_rank=my_exchange_node.rank_exchanges[exchange];
31.136 - msg_cnt=0;
31.137 -
31.138 - /*
31.139 - * Power of 2 data segment
31.140 - */
31.141 - /* post non-blocking receive */
31.142 - if(pair_rank > my_rank_in_group ){
31.143 - recv_iov[0].iov_base=src_buf_current+current_data_extent;
31.144 - recv_iov[0].iov_len=current_data_extent;
31.145 - iovec_len=1;
31.146 - remote_data_start_rank=local_data_start_rank+proc_block;
31.147 - remote_data_end_rank=remote_data_start_rank+proc_block-1;
31.148 - } else {
31.149 - recv_iov[0].iov_base=src_buf_current-current_data_extent;
31.150 - recv_iov[0].iov_len=current_data_extent;
31.151 - iovec_len=1;
31.152 - remote_data_start_rank=local_data_start_rank-proc_block;
31.153 - remote_data_end_rank=remote_data_start_rank+proc_block-1;
31.154 - }
31.155 - /* the data from the non power of 2 ranks */
31.156 - if(remote_data_start_rank<n_extra_nodes) {
31.157 - /* figure out how much data is at the remote rank */
31.158 - /* last rank with data */
31.159 - extra_start=remote_data_start_rank;
31.160 - extra_end=remote_data_end_rank;
31.161 - if(extra_end >= n_extra_nodes ) {
31.162 - /* if last rank exceeds the ranks with extra data,
31.163 - * adjust this.
31.164 - */
31.165 - extra_end=n_extra_nodes-1;
31.166 - }
31.167 - /* get the number of ranks whos data is to be grabbed */
31.168 - n_extra=extra_end-extra_start+1;
31.169 -
31.170 - recv_iov[1].iov_base=(char *)dest_buf+
31.171 - (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
31.172 - recv_iov[1].iov_len=n_extra*count;
31.173 - iovec_len=2;
31.174 - }
31.175 -
31.176 - rc=MCA_PML_CALL(irecv(recv_iov[0].iov_base,
31.177 - current_data_count,dtype,ranks_in_comm[pair_rank],
31.178 - -OMPI_COMMON_TAG_ALLREDUCE,
31.179 - comm,&(requests[msg_cnt])));
31.180 - if( 0 > rc ) {
31.181 - goto Error;
31.182 - }
31.183 - msg_cnt++;
31.184 -
31.185 - if(iovec_len > 1 ) {
31.186 - rc=MCA_PML_CALL(irecv(recv_iov[1].iov_base,
31.187 - recv_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
31.188 - -OMPI_COMMON_TAG_ALLREDUCE,
31.189 - comm,&(requests[msg_cnt])));
31.190 - if( 0 > rc ) {
31.191 - goto Error;
31.192 - }
31.193 - msg_cnt++;
31.194 - }
31.195 -
31.196 - /* post non-blocking send */
31.197 - send_iov[0].iov_base=src_buf_current;
31.198 - send_iov[0].iov_len=current_data_extent;
31.199 - iovec_len=1;
31.200 - /* the data from the non power of 2 ranks */
31.201 - if(local_data_start_rank<n_extra_nodes) {
31.202 - /* figure out how much data is at the remote rank */
31.203 - /* last rank with data */
31.204 - extra_start=local_data_start_rank;
31.205 - extra_end=extra_start+proc_block-1;
31.206 - if(extra_end >= n_extra_nodes ) {
31.207 - /* if last rank exceeds the ranks with extra data,
31.208 - * adjust this.
31.209 - */
31.210 - extra_end=n_extra_nodes-1;
31.211 - }
31.212 - /* get the number of ranks whos data is to be grabbed */
31.213 - n_extra=extra_end-extra_start+1;
31.214 -
31.215 - send_iov[1].iov_base=(char *)dest_buf+
31.216 - (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
31.217 - send_iov[1].iov_len=n_extra*count;
31.218 - iovec_len=2;
31.219 - }
31.220 -
31.221 - rc=MCA_PML_CALL(isend(send_iov[0].iov_base,
31.222 - current_data_count,dtype,ranks_in_comm[pair_rank],
31.223 - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
31.224 - comm,&(requests[msg_cnt])));
31.225 - if( 0 > rc ) {
31.226 - goto Error;
31.227 - }
31.228 - msg_cnt++;
31.229 - if( iovec_len > 1 ) {
31.230 - rc=MCA_PML_CALL(isend(send_iov[1].iov_base,
31.231 - send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
31.232 - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
31.233 - comm,&(requests[msg_cnt])));
31.234 - if( 0 > rc ) {
31.235 - goto Error;
31.236 - }
31.237 - msg_cnt++;
31.238 - }
31.239 -
31.240 - /* prepare the source buffer for the next iteration */
31.241 - if(pair_rank < my_rank_in_group ){
31.242 - src_buf_current-=current_data_extent;
31.243 - local_data_start_rank-=proc_block;
31.244 - }
31.245 - proc_block*=2;
31.246 - current_data_extent*=2;
31.247 - current_data_count*=2;
31.248 -
31.249 - /* wait on send and receive completion */
31.250 - ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
31.251 - }
31.252 -
31.253 - /* copy data in from the "extra" source, if need be */
31.254 - if(0 < my_exchange_node.n_extra_sources) {
31.255 -
31.256 - if ( EXTRA_NODE == my_exchange_node.node_type ) {
31.257 - /*
31.258 - ** receive the data
31.259 - ** */
31.260 - extra_rank=my_exchange_node.rank_extra_source;
31.261 -
31.262 - rc=MCA_PML_CALL(recv(dest_buf,
31.263 - count*n_peers,dtype,ranks_in_comm[extra_rank],
31.264 - -OMPI_COMMON_TAG_ALLREDUCE,
31.265 - comm,MPI_STATUSES_IGNORE));
31.266 - if(0 > rc ) {
31.267 - goto Error;
31.268 - }
31.269 - } else {
31.270 - /* send the data to the pair-rank outside of the power of 2 set
31.271 - ** of ranks
31.272 - */
31.273 -
31.274 - extra_rank=my_exchange_node.rank_extra_source;
31.275 - rc=MCA_PML_CALL(send(dest_buf,
31.276 - count*n_peers,dtype,ranks_in_comm[extra_rank],
31.277 - -OMPI_COMMON_TAG_ALLREDUCE,
31.278 - MCA_PML_BASE_SEND_STANDARD,
31.279 - comm));
31.280 - if( 0 > rc ) {
31.281 - goto Error;
31.282 - }
31.283 - }
31.284 - }
31.285 -
31.286 - /* return */
31.287 - return OMPI_SUCCESS;
31.288 -
31.289 -Error:
31.290 - return rc;
31.291 -}
32.1 --- a/ompi/mca/common/commpatterns/common_allreduce.c Tue Feb 05 18:15:32 2013 +0000
32.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
32.3 @@ -1,255 +0,0 @@
32.4 -/*
32.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
32.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
32.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
32.8 - * All rights reserved.
32.9 - * $COPYRIGHT$
32.10 - *
32.11 - * Additional copyrights may follow
32.12 - *
32.13 - * $HEADER$
32.14 - */
32.15 -/** @file */
32.16 -
32.17 -#include "ompi_config.h"
32.18 -
32.19 -#include "ompi/constants.h"
32.20 -#include "ompi/op/op.h"
32.21 -#include "ompi/datatype/ompi_datatype.h"
32.22 -#include "ompi/communicator/communicator.h"
32.23 -#include "opal/include/opal/sys/atomic.h"
32.24 -#include "ompi/mca/common/commpatterns/common_netpatterns.h"
32.25 -#include "common_coll_ops.h"
32.26 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
32.27 -#include "ompi/mca/pml/pml.h"
32.28 -
32.29 -/**
32.30 - * All-reduce for contigous primitive types
32.31 - */
32.32 -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
32.33 - ompi_datatype_t *dtype, int my_rank_in_group,
32.34 - struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
32.35 - ompi_communicator_t *comm)
32.36 -{
32.37 - /* local variables */
32.38 - int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
32.39 - int pair_rank,exchange,extra_rank;
32.40 - mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
32.41 - int count_processed,count_this_stripe;
32.42 - size_t dt_size,dt_extent;
32.43 - char scratch_bufers[2][MAX_TMP_BUFFER];
32.44 - int send_buffer=0,recv_buffer=1;
32.45 - char *sbuf_current, *rbuf_current;
32.46 - ompi_request_t *requests[2];
32.47 -
32.48 - /* get size of data needed - same layout as user data, so that
32.49 - * we can apply the reudction routines directly on these buffers
32.50 - */
32.51 - rc = opal_datatype_type_size((opal_datatype_t *)dtype, &dt_size);
32.52 - if( OMPI_SUCCESS != rc ) {
32.53 - goto Error;
32.54 - }
32.55 - rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent);
32.56 - if( OMPI_SUCCESS != rc ) {
32.57 - goto Error;
32.58 - }
32.59 -
32.60 - /* 1 process special case */
32.61 - if(1 == n_peers) {
32.62 - /* place my data in the correct destination buffer */
32.63 - rc=ompi_datatype_copy_content_same_ddt(dtype,count,
32.64 - (char *)rbuf, (char *)sbuf);
32.65 - if( OMPI_SUCCESS != rc ) {
32.66 - goto Error;
32.67 - }
32.68 - return OMPI_SUCCESS;
32.69 - }
32.70 -
32.71 - /* number of data types copies that the scratch buffer can hold */
32.72 - n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_extent;
32.73 - if ( 0 == n_dts_per_buffer ) {
32.74 - rc=OMPI_ERROR;
32.75 - goto Error;
32.76 - }
32.77 -
32.78 - /* compute number of stripes needed to process this collective */
32.79 - n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
32.80 -
32.81 - /* get my reduction communication pattern */
32.82 - rc = mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,
32.83 - my_rank_in_group, &my_exchange_node);
32.84 - if(OMPI_SUCCESS != rc){
32.85 - return rc;
32.86 - }
32.87 -
32.88 - count_processed=0;
32.89 -
32.90 - /* get a pointer to the shared-memory working buffer */
32.91 - /* NOTE: starting with a rather synchronous approach */
32.92 - for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
32.93 -
32.94 - /* get number of elements to process in this stripe */
32.95 - count_this_stripe=n_dts_per_buffer;
32.96 - if( count_processed + count_this_stripe > count )
32.97 - count_this_stripe=count-count_processed;
32.98 -
32.99 - /* copy data from the input buffer into the temp buffer */
32.100 - sbuf_current=(char *)sbuf+count_processed*dt_extent;
32.101 - rc=ompi_datatype_copy_content_same_ddt(dtype,count_this_stripe,
32.102 - scratch_bufers[send_buffer], sbuf_current);
32.103 - if( OMPI_SUCCESS != rc ) {
32.104 - goto Error;
32.105 - }
32.106 -
32.107 - /* copy data in from the "extra" source, if need be */
32.108 - if(0 < my_exchange_node.n_extra_sources) {
32.109 -
32.110 - if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
32.111 -
32.112 - /*
32.113 - ** Receive data from extra node
32.114 - */
32.115 - extra_rank=my_exchange_node.rank_extra_source;
32.116 - rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
32.117 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.118 - -OMPI_COMMON_TAG_ALLREDUCE, comm,
32.119 - MPI_STATUSES_IGNORE));
32.120 - if( 0 > rc ) {
32.121 - fprintf(stderr," first recv failed in comm_allreduce_pml \n");
32.122 - fflush(stderr);
32.123 - goto Error;
32.124 - }
32.125 -
32.126 -
32.127 - /* apply collective operation to first half of the data */
32.128 - if( 0 < count_this_stripe ) {
32.129 - ompi_op_reduce(op,
32.130 - (void *)scratch_bufers[send_buffer],
32.131 - (void *)scratch_bufers[recv_buffer],
32.132 - count_this_stripe,dtype);
32.133 - }
32.134 -
32.135 -
32.136 - } else {
32.137 -
32.138 - /*
32.139 - ** Send data to "partner" node
32.140 - */
32.141 - extra_rank=my_exchange_node.rank_extra_source;
32.142 - rc=MCA_PML_CALL(send(scratch_bufers[send_buffer],
32.143 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.144 - -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
32.145 - comm));
32.146 - if( 0 > rc ) {
32.147 - fprintf(stderr," first send failed in comm_allreduce_pml \n");
32.148 - fflush(stderr);
32.149 - goto Error;
32.150 - }
32.151 - }
32.152 -
32.153 - /* change pointer to scratch buffer - this was we can send data
32.154 - ** that we have summed w/o a memory copy, and receive data into the
32.155 - ** other buffer, w/o fear of over writting data that has not yet
32.156 - ** completed being send
32.157 - */
32.158 - recv_buffer^=1;
32.159 - send_buffer^=1;
32.160 - }
32.161 -
32.162 - /* loop over data exchanges */
32.163 - for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
32.164 -
32.165 - /* is the remote data read */
32.166 - pair_rank=my_exchange_node.rank_exchanges[exchange];
32.167 -
32.168 - /* post non-blocking receive */
32.169 - rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer],
32.170 - count_this_stripe,dtype,ranks_in_comm[pair_rank],
32.171 - -OMPI_COMMON_TAG_ALLREDUCE,
32.172 - comm,&(requests[0])));
32.173 - if( 0 > rc ) {
32.174 - fprintf(stderr," irecv failed in comm_allreduce_pml at iterations %d \n",
32.175 - exchange);
32.176 - fflush(stderr);
32.177 - goto Error;
32.178 - }
32.179 -
32.180 - /* post non-blocking send */
32.181 - rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer],
32.182 - count_this_stripe,dtype, ranks_in_comm[pair_rank],
32.183 - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
32.184 - comm,&(requests[1])));
32.185 - if( 0 > rc ) {
32.186 - fprintf(stderr," isend failed in comm_allreduce_pml at iterations %d \n",
32.187 - exchange);
32.188 - fflush(stderr);
32.189 - goto Error;
32.190 - }
32.191 - /* wait on send and receive completion */
32.192 - ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE);
32.193 -
32.194 - /* reduce the data */
32.195 - if( 0 < count_this_stripe ) {
32.196 - ompi_op_reduce(op,
32.197 - (void *)scratch_bufers[send_buffer],
32.198 - (void *)scratch_bufers[recv_buffer],
32.199 - count_this_stripe,dtype);
32.200 - }
32.201 - /* get ready for next step */
32.202 - recv_buffer^=1;
32.203 - send_buffer^=1;
32.204 -
32.205 - }
32.206 -
32.207 - /* copy data in from the "extra" source, if need be */
32.208 - if(0 < my_exchange_node.n_extra_sources) {
32.209 -
32.210 - if ( EXTRA_NODE == my_exchange_node.node_type ) {
32.211 - /*
32.212 - ** receive the data
32.213 - ** */
32.214 - extra_rank=my_exchange_node.rank_extra_source;
32.215 - rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
32.216 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.217 - -OMPI_COMMON_TAG_ALLREDUCE, comm,
32.218 - MPI_STATUSES_IGNORE));
32.219 - if( 0 > rc ) {
32.220 - fprintf(stderr," last recv failed in comm_allreduce_pml \n");
32.221 - fflush(stderr);
32.222 - goto Error;
32.223 - }
32.224 -
32.225 - recv_buffer^=1;
32.226 - send_buffer^=1;
32.227 - } else {
32.228 - /* send the data to the pair-rank outside of the power of 2 set
32.229 - ** of ranks
32.230 - */
32.231 -
32.232 - extra_rank=my_exchange_node.rank_extra_source;
32.233 - rc=MCA_PML_CALL(send((char *)scratch_bufers[send_buffer],
32.234 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.235 - -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
32.236 - comm));
32.237 - if( 0 > rc ) {
32.238 - fprintf(stderr," last send failed in comm_allreduce_pml \n");
32.239 - fflush(stderr);
32.240 - goto Error;
32.241 - }
32.242 - }
32.243 - }
32.244 -
32.245 - /* copy data from the temp buffer into the output buffer */
32.246 - rbuf_current = (char *) rbuf + count_processed * dt_size;
32.247 - memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size);
32.248 -
32.249 - /* update the count of elements processed */
32.250 - count_processed += count_this_stripe;
32.251 - }
32.252 -
32.253 - /* return */
32.254 - return OMPI_SUCCESS;
32.255 -
32.256 -Error:
32.257 - return rc;
32.258 -}
33.1 --- a/ompi/mca/common/commpatterns/common_bcast.c Tue Feb 05 18:15:32 2013 +0000
33.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
33.3 @@ -1,97 +0,0 @@
33.4 -/*
33.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
33.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
33.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
33.8 - * All rights reserved.
33.9 - * $COPYRIGHT$
33.10 - *
33.11 - * Additional copyrights may follow
33.12 - *
33.13 - * $HEADER$
33.14 - */
33.15 -/** @file */
33.16 -
33.17 -#include "ompi_config.h"
33.18 -
33.19 -#include "ompi/constants.h"
33.20 -#include "ompi/op/op.h"
33.21 -#include "ompi/datatype/ompi_datatype.h"
33.22 -#include "ompi/communicator/communicator.h"
33.23 -#include "opal/include/opal/sys/atomic.h"
33.24 -#include "common_coll_ops.h"
33.25 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
33.26 -#include "ompi/mca/pml/pml.h"
33.27 -
33.28 -/**
33.29 - * Bcast - subgroup in communicator
33.30 - * This is a very simple algorithm - binary tree, transmitting the full
33.31 - * message at each step.
33.32 - */
33.33 -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
33.34 - ompi_datatype_t *dtype, int my_rank_in_group,
33.35 - int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
33.36 -{
33.37 - /* local variables */
33.38 - int rc=OMPI_SUCCESS,msg_cnt,i;
33.39 - ompi_request_t *requests[2];
33.40 - int node_rank, peer_rank;
33.41 - mca_common_netpatterns_tree_node_t node_data;
33.42 -
33.43 - /*
33.44 - * shift rank to root==0 tree
33.45 - */
33.46 - node_rank=(my_rank_in_group-root+n_peers)%n_peers;
33.47 -
33.48 - /*
33.49 - * compute my communication pattern - binary tree
33.50 - */
33.51 - rc=mca_common_netpatterns_setup_narray_tree(2, node_rank, n_peers,
33.52 - &node_data);
33.53 - if( OMPI_SUCCESS != rc ) {
33.54 - goto Error;
33.55 - }
33.56 -
33.57 - /* 1 process special case */
33.58 - if(1 == n_peers) {
33.59 - return OMPI_SUCCESS;
33.60 - }
33.61 -
33.62 - /* if I have parents - wait on the data to arrive */
33.63 - if(node_data.n_parents) {
33.64 - /* I will have only 1 parent */
33.65 - peer_rank=node_data.parent_rank;
33.66 - peer_rank=(peer_rank+root)%n_peers;
33.67 - /* translate back to actual rank */
33.68 - rc=MCA_PML_CALL(recv(buffer, count,dtype,peer_rank,
33.69 - -OMPI_COMMON_TAG_BCAST, comm, MPI_STATUSES_IGNORE));
33.70 - if( 0 > rc ) {
33.71 - goto Error;
33.72 - }
33.73 - }
33.74 -
33.75 - /* send the data to my children */
33.76 - msg_cnt=0;
33.77 - for(i=0 ; i < node_data.n_children ; i++ ) {
33.78 - peer_rank=node_data.children_ranks[i];
33.79 - peer_rank=(peer_rank+root)%n_peers;
33.80 - rc=MCA_PML_CALL(isend(buffer,
33.81 - count,dtype,peer_rank,
33.82 - -OMPI_COMMON_TAG_BCAST,MCA_PML_BASE_SEND_STANDARD,
33.83 - comm,&(requests[msg_cnt])));
33.84 - if( 0 > rc ) {
33.85 - goto Error;
33.86 - }
33.87 - msg_cnt++;
33.88 - }
33.89 - /* wait for send completion */
33.90 - if(msg_cnt) {
33.91 - /* wait on send and receive completion */
33.92 - ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
33.93 - }
33.94 -
33.95 - /* return */
33.96 - return OMPI_SUCCESS;
33.97 -
33.98 -Error:
33.99 - return rc;
33.100 -}
34.1 --- a/ompi/mca/common/commpatterns/common_coll_ops.h Tue Feb 05 18:15:32 2013 +0000
34.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
34.3 @@ -1,51 +0,0 @@
34.4 -/*
34.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
34.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
34.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
34.8 - * All rights reserved.
34.9 - * $COPYRIGHT$
34.10 - *
34.11 - * Additional copyrights may follow
34.12 - *
34.13 - * $HEADER$
34.14 - */
34.15 -
34.16 -#ifndef COMM_COLL_OP_TYPES_H
34.17 -#define COMM_COLL_OP_TYPES_H
34.18 -
34.19 -#include "ompi_config.h"
34.20 -#include "ompi/communicator/communicator.h"
34.21 -#include "ompi/datatype/ompi_datatype.h"
34.22 -#include "ompi/proc/proc.h"
34.23 -
34.24 -BEGIN_C_DECLS
34.25 -
34.26 -#define OMPI_COMMON_TAG_ALLREDUCE 99
34.27 -#define OMPI_COMMON_TAG_BCAST 98
34.28 -
34.29 -
34.30 -
34.31 -
34.32 -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
34.33 - ompi_datatype_t *dtype, int my_rank_in_group, int n_peers,
34.34 - int *ranks_in_comm,ompi_communicator_t *comm);
34.35 -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
34.36 - ompi_datatype_t *dtype, int my_rank_in_group,
34.37 - struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
34.38 - ompi_communicator_t *comm);
34.39 -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
34.40 - ompi_datatype_t *dtype, int my_rank_in_group,
34.41 - int n_peers, int *ranks_in_comm,ompi_communicator_t
34.42 - *comm);
34.43 -
34.44 -/* reduction operations supported */
34.45 -#define OP_SUM 1
34.46 -#define OP_MAX 2
34.47 -#define OP_MIN 3
34.48 -
34.49 -#define TYPE_INT4 1
34.50 -
34.51 -
34.52 -END_C_DECLS
34.53 -
34.54 -#endif /* COMM_COLL_OP_TYPES_H */
35.1 --- a/ompi/mca/common/commpatterns/common_netpatterns.h Tue Feb 05 18:15:32 2013 +0000
35.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
35.3 @@ -1,22 +0,0 @@
35.4 -/*
35.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
35.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
35.7 - * $COPYRIGHT$
35.8 - *
35.9 - * Additional copyrights may follow
35.10 - *
35.11 - * $HEADER$
35.12 - */
35.13 -
35.14 -#ifndef COMM_NETPATTERNS_H
35.15 -#define COMM_NETPATTERNS_H
35.16 -
35.17 -#include "ompi_config.h"
35.18 -
35.19 -BEGIN_C_DECLS
35.20 -
35.21 -#define MAX_TMP_BUFFER 8192
35.22 -
35.23 -END_C_DECLS
35.24 -
35.25 -#endif /* COMM_NETPATTERNS_H */
36.1 --- a/ompi/mca/common/commpatterns/ompi_common_netpatterns_macros.h Tue Feb 05 18:15:32 2013 +0000
36.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
36.3 @@ -1,52 +0,0 @@
36.4 -/*
36.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
36.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
36.7 - * $COPYRIGHT$
36.8 - *
36.9 - * Additional copyrights may follow
36.10 - *
36.11 - * $HEADER$
36.12 - */
36.13 -
36.14 -#ifndef OMPI_COMMON_NETPATTERNS_MACROS_H
36.15 -#define OMPI_COMMON_NETPATTERNS_MACROS_H
36.16 -
36.17 -#include "ompi_config.h"
36.18 -
36.19 -BEGIN_C_DECLS
36.20 -
36.21 -/* function to decompose an interger into it's representation in base K */
36.22 -/*
36.23 - * input_value - value to translate (input)
36.24 - * base - base of representation (input)
36.25 - * highest_power - the highest power that may have a non-zero entry (input)
36.26 - * the assumption is that this will be called in the critical path
36.27 - * to compute communication patterns, so will precompute such values
36.28 - * and pass the in.
36.29 - * base_to_power_i - array of base to ith power (input)
36.30 - * cum_base_to_power_i - array of cummulative base to ith power (input)
36.31 - * base_k_rep - representation in base "base". Space is pre-allocated. (out)
36.32 - */
36.33 -static inline __opal_attribute_always_inline__ void
36.34 -common_netpatterns_obtain_rep_base_k (int input_value, int base,
36.35 - int highest_power, int *base_to_power_i,
36.36 - int *base_k_rep
36.37 - )
36.38 -{
36.39 - /* local variables */
36.40 - int lvl, work_value;
36.41 -
36.42 - /* loop over all possible powers */
36.43 - work_value=input_value;
36.44 - for( lvl=highest_power ; lvl >= 0 ; lvl-- ) {
36.45 - /* still need to compute the actual coefficient */
36.46 - base_k_rep[lvl]=work_value/base_to_power_i[lvl];
36.47 - work_value-=(base_k_rep[lvl]*base_to_power_i[lvl]);
36.48 -
36.49 - }
36.50 -
36.51 -}
36.52 -
36.53 -END_C_DECLS
36.54 -
36.55 -#endif /* OMPI_COMMON_NETPATTERNS_MACROS_H */
37.1 --- a/ompi/mca/common/netpatterns/.windows Tue Feb 05 18:15:32 2013 +0000
37.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
37.3 @@ -1,12 +0,0 @@
37.4 -#
37.5 -# Copyright (c) 2008-2012 High Performance Computing Center Stuttgart,
37.6 -# University of Stuttgart. All rights reserved.
37.7 -# $COPYRIGHT$
37.8 -#
37.9 -# Additional copyrights may follow
37.10 -#
37.11 -# $HEADER$
37.12 -#
37.13 -
37.14 -# Specific to this module
37.15 -exclude_list=common_allreduce.c
37.16 \ No newline at end of file
38.1 --- a/ompi/mca/common/netpatterns/Makefile.am Tue Feb 05 18:15:32 2013 +0000
38.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
38.3 @@ -1,94 +0,0 @@
38.4 -#
38.5 -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
38.6 -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
38.7 -# $COPYRIGHT$
38.8 -#
38.9 -# Additional copyrights may follow
38.10 -#
38.11 -# $HEADER$
38.12 -#
38.13 -
38.14 -# A word of explanation...
38.15 -#
38.16 -# This library is linked against various MCA components because all
38.17 -# shared-memory based components (e.g., mpool, ptl, etc.) need to
38.18 -# share some common code and data. There's two cases:
38.19 -#
38.20 -# 1. libmca_common_netpatterns.la is a shared library. By linking that shared
38.21 -# library to all components that need it, the OS linker will
38.22 -# automatically load it into the process as necessary, and there will
38.23 -# only be one copy (i.e., all the components will share *one* copy of
38.24 -# the code and data).
38.25 -#
38.26 -# 2. libmca_common_netpatterns.la is a static library. In this case, it will
38.27 -# be rolled up into the top-level libmpi.la. It will also be rolled
38.28 -# into each component, but then the component will also be rolled up
38.29 -# into the upper-level libmpi.la. Linkers universally know how to
38.30 -# "figure this out" so that we end up with only one copy of the code
38.31 -# and data.
38.32 -#
38.33 -# Note that building this common component statically and linking
38.34 -# against other dynamic components is *not* supported!
38.35 -
38.36 -EXTRA_DIST = .windows
38.37 -
38.38 -# Header files
38.39 -
38.40 -headers = \
38.41 - common_netpatterns.h \
38.42 - common_netpatterns_knomial_tree.h \
38.43 - common_coll_ops.h
38.44 -
38.45 -# Source files
38.46 -
38.47 -sources = \
38.48 - common_netpatterns_base.c \
38.49 - common_netpatterns_multinomial_tree.c \
38.50 - common_netpatterns_nary_tree.c \
38.51 - common_netpatterns_knomial_tree.c
38.52 -
38.53 -# common_allreduce.c # the allredeace is broken
38.54 -
38.55 -# As per above, we'll either have an installable or noinst result.
38.56 -# The installable one should follow the same MCA prefix naming rules
38.57 -# (i.e., libmca_<type>_<name>.la). The noinst one can be named
38.58 -# whatever it wants, although libmca_<type>_<name>_noinst.la is
38.59 -# recommended.
38.60 -
38.61 -# To simplify components that link to this library, we will *always*
38.62 -# have an output libtool library named libmca_<type>_<name>.la -- even
38.63 -# for case 2) described above (i.e., so there's no conditional logic
38.64 -# necessary in component Makefile.am's that link to this library).
38.65 -# Hence, if we're creating a noinst version of this library (i.e.,
38.66 -# case 2), we sym link it to the libmca_<type>_<name>.la name
38.67 -# (libtool will do the Right Things under the covers). See the
38.68 -# all-local and clean-local rules, below, for how this is effected.
38.69 -
38.70 -lib_LTLIBRARIES =
38.71 -noinst_LTLIBRARIES =
38.72 -comp_inst = libmca_common_netpatterns.la
38.73 -comp_noinst = libmca_common_netpatterns_noinst.la
38.74 -
38.75 -if MCA_BUILD_ompi_common_netpatterns_DSO
38.76 -lib_LTLIBRARIES += $(comp_inst)
38.77 -else
38.78 -noinst_LTLIBRARIES += $(comp_noinst)
38.79 -endif
38.80 -
38.81 -libmca_common_netpatterns_la_SOURCES = $(headers) $(sources)
38.82 -libmca_common_netpatterns_noinst_la_SOURCES = $(libmca_common_netpatterns_la_SOURCES)
38.83 -
38.84 -# These two rules will sym link the "noinst" libtool library filename
38.85 -# to the installable libtool library filename in the case where we are
38.86 -# compiling this component statically (case 2), described above).
38.87 -
38.88 -all-local:
38.89 - if test -z "$(lib_LTLIBRARIES)"; then \
38.90 - rm -f "$(comp_inst)"; \
38.91 - $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
38.92 - fi
38.93 -
38.94 -clean-local:
38.95 - if test -z "$(lib_LTLIBRARIES)"; then \
38.96 - rm -f "$(comp_inst)"; \
38.97 - fi
39.1 --- a/ompi/mca/common/netpatterns/common_allreduce.c Tue Feb 05 18:15:32 2013 +0000
39.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
39.3 @@ -1,347 +0,0 @@
39.4 -/*
39.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
39.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
39.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
39.8 - * All rights reserved.
39.9 - * $COPYRIGHT$
39.10 - *
39.11 - * Additional copyrights may follow
39.12 - *
39.13 - * $HEADER$
39.14 - */
39.15 -/** @file */
39.16 -
39.17 -#include "ompi_config.h"
39.18 -
39.19 -#include "ompi/constants.h"
39.20 -#include "coll_sm2.h"
39.21 -#include "ompi/op/op.h"
39.22 -#include "ompi/datatype/ompi_datatype.h"
39.23 -#include "ompi/communicator/communicator.h"
39.24 -#include "ompi/mca/rte/rte.h"
39.25 -
39.26 -void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg,
39.27 - int count, ompi_rml_tag_t tag, void* cbdata)
39.28 -{
39.29 - /* set send completion flag */
39.30 - *(int *)cbdata=1;
39.31 -}
39.32 -
39.33 -
39.34 -void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg,
39.35 - int count, ompi_rml_tag_t tag, void* cbdata)
39.36 -{
39.37 - /* set receive completion flag */
39.38 - MB();
39.39 - *(int *)cbdata=1;
39.40 -}
39.41 -
39.42 -
39.43 -static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int count,
39.44 - int data_type)
39.45 -{
39.46 - /* local variables */
39.47 - int ret;
39.48 -
39.49 - /* op type */
39.50 - switch (op_type) {
39.51 -
39.52 - case OP_SUM:
39.53 -
39.54 -
39.55 - switch (data_type) {
39.56 - case TYPE_INT4:
39.57 - int *int_src_ptr=(int *)src_ptr;
39.58 - int *int_src_dst_ptr=(int *)src_dst_ptr;
39.59 - int cnt;
39.60 - for(cnt=0 ; cnt < count ; ) {
39.61 - (*(int_src_dst_ptr))+=(*(int_src_ptr));
39.62 - break;
39.63 - default:
39.64 - ret=OMPI_ERROR;
39.65 - goto Error;
39.66 - }
39.67 -
39.68 - break;
39.69 -
39.70 - default:
39.71 - ret=OMPI_ERROR;
39.72 - goto Error;
39.73 - }
39.74 -Error:
39.75 - return ret;
39.76 -}
39.77 -
39.78 -/**
39.79 - * All-reduce for contigous primitive types
39.80 - */
39.81 -static
39.82 -comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
39.83 - int op_type, opal_list_t *peers)
39.84 -{
39.85 - /* local variables */
39.86 - int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
39.87 - int pair_rank,exchange,extra_rank;
39.88 - int index_read,index_write;
39.89 - mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
39.90 - int my_rank,count_processed,count_this_stripe;
39.91 - size_t n_peers,message_extent,len_data_buffer;
39.92 - size_t dt_size;
39.93 - long long tag, base_tag;
39.94 - sm_work_buffer_t *sm_buffer_desc;
39.95 - opal_list_item_t *item;
39.96 - char scratch_bufers[2][MAX_TMP_BUFFER];
39.97 - int send_buffer=0;recv_buffer=1;
39.98 - char *sbuf_current,*rbuf_current;
39.99 - ompi_proc_t **proc_array;
39.100 - struct iovec send_iov, recv_iov;
39.101 - volatile int *recv_done, *send_done;
39.102 - int recv_completion_flag, send_completion_flag;
39.103 - int data_type;
39.104 -
39.105 - /* get size of data needed - same layout as user data, so that
39.106 - * we can apply the reudction routines directly on these buffers
39.107 - */
39.108 - rc=opal_datatype_type_size(dtype, &dt_size);
39.109 - if( OMPI_SUCCESS != rc ) {
39.110 - goto Error;
39.111 - }
39.112 - message_extent=dt_extent*count;
39.113 -
39.114 - /* lenght of control and data regions */
39.115 - len_data_buffer=sm_module->data_memory_per_proc_per_segment;
39.116 -
39.117 - /* number of data types copies that the scratch buffer can hold */
39.118 - n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_size;
39.119 - if ( 0 == n_dts_per_buffer ) {
39.120 - rc=OMPI_ERROR;
39.121 - goto Error;
39.122 - }
39.123 -
39.124 - /* need a read and a write buffer for a pair-wise exchange of data */
39.125 - n_dts_per_buffer/=2;
39.126 - len_data_buffer=n_dts_per_buffer*dt_size;
39.127 -
39.128 - /* compute number of stripes needed to process this collective */
39.129 - n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
39.130 -
39.131 - /* */
39.132 - n_peers=opal_list_get_size(peers);
39.133 -
39.134 - /* get my rank in the list */
39.135 - my_rank=0;
39.136 - for (item = opal_list_get_first(peers) ;
39.137 - item != opal_list_get_end(peers) ;
39.138 - item = opal_list_get_next(peers)) {
39.139 - if(ompi_proc_local()==(ompi_proc_t *)item){
39.140 - /* this is the pointer to my proc strucuture */
39.141 - break;
39.142 - }
39.143 - my_rank++;
39.144 - }
39.145 - proc_array=(ompi_proc_t **)malloc(sizeof(ompi_proc_t *)*n_peers);
39.146 - if( NULL == proc_array) {
39.147 - goto Error;
39.148 - }
39.149 - cnt=0;
39.150 - for (item = opal_list_get_first(peers) ;
39.151 - item != opal_list_get_end(peers) ;
39.152 - item = opal_list_get_next(peers)) {
39.153 - proc_array[cnt]=(ompi_proc_t *)item;
39.154 - cnt++;
39.155 - }
39.156 -
39.157 - /* get my reduction communication pattern */
39.158 - ret=mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node);
39.159 - if(OMPI_SUCCESS != ret){
39.160 - return ret;
39.161 - }
39.162 -
39.163 - /* setup flags for non-blocking communications */
39.164 - recv_done=&recv_completion_flag;
39.165 - send_done=&send_completion_flag;
39.166 -
39.167 - /* set data type */
39.168 - if(&opal_datatype_int4==dtype) {
39.169 - data_type=TYPE_INT4;
39.170 - }
39.171 -
39.172 - count_processed=0;
39.173 -
39.174 - /* get a pointer to the shared-memory working buffer */
39.175 - /* NOTE: starting with a rather synchronous approach */
39.176 - for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
39.177 -
39.178 - /* get number of elements to process in this stripe */
39.179 - count_this_stripe=n_dts_per_buffer;
39.180 - if( count_processed + count_this_stripe > count )
39.181 - count_this_stripe=count-count_processed;
39.182 -
39.183 - /* copy data from the input buffer into the temp buffer */
39.184 - sbuf_current=(char *)sbuf+count_processed*dt_size;
39.185 - memcopy(scratch_bufers[send_buffer],sbuf_current,count_this_stripe*dt_size);
39.186 -
39.187 - /* copy data in from the "extra" source, if need be */
39.188 - if(0 < my_exchange_node->n_extra_sources) {
39.189 -
39.190 - if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
39.191 -
39.192 - /*
39.193 - ** Receive data from extra node
39.194 - */
39.195 -
39.196 - extra_rank=my_exchange_node.rank_extra_source;
39.197 - recv_iov.iov_base=scratch_bufers[recv_buffer];
39.198 - recv_iov.iov_len=count_this_stripe*dt_size;
39.199 - rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
39.200 - OMPI_RML_TAG_ALLREDUCE , 0);
39.201 - if(OMPI_SUCCESS != rc ) {
39.202 - goto Error;
39.203 - }
39.204 -
39.205 - /* apply collective operation to first half of the data */
39.206 - if( 0 < count_this_stripe ) {
39.207 - op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
39.208 - (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
39.209 - }
39.210 -
39.211 -
39.212 - } else {
39.213 -
39.214 - /*
39.215 - ** Send data to "partner" node
39.216 - */
39.217 - extra_rank=my_exchange_node.rank_extra_source;
39.218 - send_iov.iov_base=scratch_bufers[send_buffer];
39.219 - send_iov.iov_len=count_this_stripe*dt_size;
39.220 - rc = ompi_rte_send(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
39.221 - OMPI_RML_TAG_ALLREDUCE , 0);
39.222 - if(OMPI_SUCCESS != rc ) {
39.223 - goto Error;
39.224 - }
39.225 - }
39.226 -
39.227 - /* change pointer to scratch buffer - this was we can send data
39.228 - ** that we have summed w/o a memory copy, and receive data into the
39.229 - ** other buffer, w/o fear of over writting data that has not yet
39.230 - ** completed being send
39.231 - */
39.232 - recv_buffer^=1;
39.233 - send_buffer^=1;
39.234 - }
39.235 -
39.236 - MB();
39.237 - /*
39.238 - * Signal parent that data is ready
39.239 - */
39.240 - tag=base_tag+1;
39.241 - my_ctl_pointer->flag=tag;
39.242 -
39.243 - /* loop over data exchanges */
39.244 - for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
39.245 -
39.246 - /* debug
39.247 - t4=opal_sys_timer_get_cycles();
39.248 - end debug */
39.249 -
39.250 -
39.251 - my_write_pointer=my_tmp_data_buffer[index_write];
39.252 - my_read_pointer=my_tmp_data_buffer[index_read];
39.253 -
39.254 - /* is the remote data read */
39.255 - pair_rank=my_exchange_node->rank_exchanges[exchange];
39.256 -
39.257 - *recv_done=0;
39.258 - *send_done=0;
39.259 - MB();
39.260 -
39.261 - /* post non-blocking receive */
39.262 - recv_iov.iov_base=scratch_bufers[send_buffer];
39.263 - recv_iov.iov_len=count_this_stripe*dt_size;
39.264 - rc = ompi_rte_recv_nb(&(proc_array[extra_rank]->proc_name), recv_iov, 1,
39.265 - OMPI_RML_TAG_ALLREDUCE , 0, recv_completion, recv_done);
39.266 -
39.267 - /* post non-blocking send */
39.268 - send_iov.iov_base=scratch_bufers[send_buffer];
39.269 - send_iov.iov_len=count_this_stripe*dt_size;
39.270 - rc = ompi_rte_send_nb(&(proc_array[extra_rank]->proc_name), send_iov, 1,
39.271 - OMPI_RML_TAG_ALLREDUCE , 0, send_completion, send_done);
39.272 -
39.273 - /* wait on receive completion */
39.274 - while(!(*recv_done) ) {
39.275 - opal_progress();
39.276 - }
39.277 -
39.278 - /* reduce the data */
39.279 - if( 0 < count_this_stripe ) {
39.280 - op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
39.281 - (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
39.282 - }
39.283 -
39.284 -
39.285 - /* get ready for next step */
39.286 - index_read=(exchange&1);
39.287 - index_write=((exchange+1)&1);
39.288 -
39.289 - /* wait on send completion */
39.290 - while(!(*send_done) ) {
39.291 - opal_progress();
39.292 - }
39.293 -
39.294 - }
39.295 -
39.296 - /* copy data in from the "extra" source, if need be */
39.297 - if(0 < my_exchange_node->n_extra_sources) {
39.298 -
39.299 - if ( EXTRA_NODE == my_exchange_node->node_type ) {
39.300 - /*
39.301 - ** receive the data
39.302 - ** */
39.303 - extra_rank=my_exchange_node->rank_extra_source;
39.304 -
39.305 - recv_iov.iov_base=scratch_bufers[recv_buffer];
39.306 - recv_iov.iov_len=count_this_stripe*dt_size;
39.307 - rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
39.308 - OMPI_RML_TAG_ALLREDUCE , 0);
39.309 - if(OMPI_SUCCESS != rc ) {
39.310 - goto Error;
39.311 - }
39.312 -
39.313 - } else {
39.314 - /* send the data to the pair-rank outside of the power of 2 set
39.315 - ** of ranks
39.316 - */
39.317 -
39.318 - extra_rank=my_exchange_node->rank_extra_source;
39.319 - send_iov.iov_base=scratch_bufers[recv_buffer];
39.320 - send_iov.iov_len=count_this_stripe*dt_size;
39.321 - rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
39.322 - OMPI_RML_TAG_ALLREDUCE , 0);
39.323 - if(OMPI_SUCCESS != rc ) {
39.324 - goto Error;
39.325 - }
39.326 - }
39.327 - }
39.328 -
39.329 - /* copy data into the destination buffer */
39.330 - rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
39.331 - (char *)((char *)rbuf+dt_extent*count_processed),
39.332 - (char *)my_write_pointer);
39.333 - if( 0 != rc ) {
39.334 - return OMPI_ERROR;
39.335 - }
39.336 -
39.337 - /* copy data from the temp buffer into the output buffer */
39.338 - rbuf_current=(char *)rbuf+count_processed*dt_size;
39.339 - memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size);
39.340 -
39.341 - /* update the count of elements processed */
39.342 - count_processed+=count_this_stripe;
39.343 - }
39.344 -
39.345 - /* return */
39.346 - return rc;
39.347 -
39.348 -Error:
39.349 - return rc;
39.350 -}
40.1 --- a/ompi/mca/common/netpatterns/common_coll_ops.h Tue Feb 05 18:15:32 2013 +0000
40.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
40.3 @@ -1,29 +0,0 @@
40.4 -/*
40.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
40.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
40.7 - * $COPYRIGHT$
40.8 - *
40.9 - * Additional copyrights may follow
40.10 - *
40.11 - * $HEADER$
40.12 - */
40.13 -
40.14 -#ifndef COMM_OP_TYPES_H
40.15 -#define COMM_OP_TYPES_H
40.16 -
40.17 -#include "ompi_config.h"
40.18 -
40.19 -BEGIN_C_DECLS
40.20 -
40.21 -int comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
40.22 - int op, opal_list_t *peers);
40.23 -
40.24 -/* reduction operations supported */
40.25 -#define OP_SUM 1
40.26 -
40.27 -#define TYPE_INT4 1
40.28 -
40.29 -
40.30 -END_C_DECLS
40.31 -
40.32 -#endif /* COMM_OP_TYPES_H */
41.1 --- a/ompi/mca/common/netpatterns/common_netpatterns.h Tue Feb 05 18:15:32 2013 +0000
41.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
41.3 @@ -1,150 +0,0 @@
41.4 -/*
41.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
41.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
41.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
41.8 - * All rights reserved.
41.9 - * $COPYRIGHT$
41.10 - *
41.11 - * Additional copyrights may follow
41.12 - *
41.13 - * $HEADER$
41.14 - */
41.15 -
41.16 -#ifndef COMM_PATTERNS_H
41.17 -#define COMM_PATTERNS_H
41.18 -
41.19 -#include "ompi_config.h"
41.20 -
41.21 -#include "ompi/mca/rte/rte.h"
41.22 -#include "common_netpatterns_knomial_tree.h"
41.23 -
41.24 -BEGIN_C_DECLS
41.25 -
41.26 -int ompi_common_netpatterns_base_err(const char* fmt, ...);
41.27 -int ompi_common_netpatterns_register_mca_params(void);
41.28 -
41.29 -#if OPAL_ENABLE_DEBUG
41.30 -extern int ompi_common_netpatterns_base_verbose; /* disabled by default */
41.31 -OMPI_DECLSPEC extern int ompi_common_netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
41.32 -#define NETPATTERNS_VERBOSE(args) \
41.33 - do { \
41.34 - if(ompi_common_netpatterns_base_verbose > 0) { \
41.35 - ompi_common_netpatterns_base_err("[%s]%s[%s:%d:%s] ",\
41.36 - ompi_process_info.nodename, \
41.37 - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \
41.38 - __FILE__, __LINE__, __func__); \
41.39 - ompi_common_netpatterns_base_err args; \
41.40 - ompi_common_netpatterns_base_err("\n"); \
41.41 - } \
41.42 - } while(0);
41.43 -#else
41.44 -#define NETPATTERNS_VERBOSE(args)
41.45 -#endif
41.46 -
41.47 -#define FIND_BASE(base,myid,level,k) \
41.48 - do { \
41.49 - int temp = 1; \
41.50 - int jj; \
41.51 - int knt2; \
41.52 - \
41.53 - base = 0; \
41.54 - for( jj = 0; jj < level; jj++) {\
41.55 - temp *= k; \
41.56 - } \
41.57 - knt2 = 1; \
41.58 - while(myid >= knt2*temp){ \
41.59 - knt2++; \
41.60 - } \
41.61 - base = knt2*temp - temp; \
41.62 - } while(0) \
41.63 -
41.64 -
41.65 -
41.66 -
41.67 -/* enum for node type */
41.68 -enum {
41.69 - ROOT_NODE,
41.70 - LEAF_NODE,
41.71 - INTERIOR_NODE
41.72 -};
41.73 -
41.74 -
41.75 -/*
41.76 - * N-order tree node description
41.77 - */
41.78 -struct mca_common_netpatterns_tree_node_t {
41.79 - /* my rank within the group */
41.80 - int my_rank;
41.81 - /* my node type - root, leaf, or interior */
41.82 - int my_node_type;
41.83 - /* number of nodes in the tree */
41.84 - int tree_size;
41.85 - /* number of parents (0/1) */
41.86 - int n_parents;
41.87 - /* number of children */
41.88 - int n_children;
41.89 - /* parent rank within the group */
41.90 - int parent_rank;
41.91 - /* chidren ranks within the group */
41.92 - int *children_ranks;
41.93 -};
41.94 -typedef struct mca_common_netpatterns_tree_node_t mca_common_netpatterns_tree_node_t;
41.95 -
41.96 -struct mca_common_netpatterns_k_exchange_node_t;
41.97 -/*
41.98 - * N-order + knominal tree node description
41.99 - */
41.100 -struct mca_common_netpatterns_narray_knomial_tree_node_t {
41.101 - /* my rank within the group */
41.102 - int my_rank;
41.103 - /* my node type - root, leaf, or interior */
41.104 - int my_node_type;
41.105 - /* number of nodes in the tree */
41.106 - int tree_size;
41.107 - /* number of parents (0/1) */
41.108 - int n_parents;
41.109 - /* number of children */
41.110 - int n_children;
41.111 - /* parent rank within the group */
41.112 - int parent_rank;
41.113 - /* chidren ranks within the group */
41.114 - int *children_ranks;
41.115 - /* Total number of ranks on this specific level */
41.116 - int level_size;
41.117 - /* Rank on this node inside of level */
41.118 - int rank_on_level;
41.119 - /* Knomial recursive gather information */
41.120 - struct mca_common_netpatterns_k_exchange_node_t k_node;
41.121 -};
41.122 -typedef struct mca_common_netpatterns_narray_knomial_tree_node_t
41.123 -mca_common_netpatterns_narray_knomial_tree_node_t;
41.124 -
41.125 -
41.126 -/* Init code for common_netpatterns */
41.127 -OMPI_DECLSPEC int ompi_common_netpatterns_init(void);
41.128 -
41.129 -/* setup an n-array tree */
41.130 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
41.131 - mca_common_netpatterns_tree_node_t *my_node);
41.132 -/* setup an n-array tree with k-nomial levels */
41.133 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes,
41.134 - mca_common_netpatterns_narray_knomial_tree_node_t *my_node);
41.135 -
41.136 -/* setup an multi-nomial tree - for each node in the tree
41.137 - * this returns it's parent, and it's children
41.138 - */
41.139 -OMPI_DECLSPEC int mca_common_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
41.140 - mca_common_netpatterns_tree_node_t *tree_nodes);
41.141 -
41.142 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree_contigous_ranks(int tree_order,
41.143 - int num_nodes, mca_common_netpatterns_tree_node_t **tree_nodes);
41.144 -
41.145 -/* calculate the nearest power of radix that is equal to or greater
41.146 - * than size, with the specified radix. The resulting tree is of
41.147 - * depth n_lvls.
41.148 - */
41.149 -OMPI_DECLSPEC int roundup_to_power_radix( int radix, int size, int *n_lvls );
41.150 -
41.151 -END_C_DECLS
41.152 -
41.153 -#endif /* COMM_PATTERNS_H */
42.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_base.c Tue Feb 05 18:15:32 2013 +0000
42.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
42.3 @@ -1,53 +0,0 @@
42.4 -/*
42.5 - *
42.6 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
42.7 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
42.8 - * $COPYRIGHT$
42.9 - *
42.10 - * Additional copyrights may follow
42.11 - *
42.12 - * $HEADER$
42.13 - */
42.14 -#include "opal/mca/base/mca_base_param.h"
42.15 -#include "ompi/include/ompi/constants.h"
42.16 -#include "common_netpatterns.h"
42.17 -
42.18 -int ompi_common_netpatterns_base_verbose = 0; /* disabled by default */
42.19 -
42.20 -int ompi_common_netpatterns_register_mca_params(void)
42.21 -{
42.22 - mca_base_param_reg_int_name("common",
42.23 - "netpatterns_base_verbose",
42.24 - "Verbosity level of the NETPATTERNS framework",
42.25 - false, false,
42.26 - 0,
42.27 - &ompi_common_netpatterns_base_verbose);
42.28 -
42.29 - return OMPI_SUCCESS;
42.30 -}
42.31 -
42.32 -int ompi_common_netpatterns_base_err(const char* fmt, ...)
42.33 -{
42.34 - va_list list;
42.35 - int ret;
42.36 -
42.37 - va_start(list, fmt);
42.38 - ret = vfprintf(stderr, fmt, list);
42.39 - va_end(list);
42.40 - return ret;
42.41 -}
42.42 -
42.43 -int ompi_common_netpatterns_init(void)
42.44 -{
42.45 -/* There is no component for common_netpatterns so every component that uses it
42.46 - should call ompi_common_netpatterns_init, still we want to run it only once */
42.47 -static int was_called = 0;
42.48 -
42.49 - if (0 == was_called) {
42.50 - was_called = 1;
42.51 -
42.52 - return ompi_common_netpatterns_register_mca_params();
42.53 - }
42.54 -
42.55 - return OMPI_SUCCESS;
42.56 -}
43.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.c Tue Feb 05 18:15:32 2013 +0000
43.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
43.3 @@ -1,935 +0,0 @@
43.4 -/*
43.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
43.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
43.7 -* $COPYRIGHT$
43.8 - *
43.9 - * Additional copyrights may follow
43.10 - *
43.11 - * $HEADER$
43.12 - */
43.13 -
43.14 -#include "ompi_config.h"
43.15 -#ifdef HAVE_UNISTD_H
43.16 -#include <unistd.h>
43.17 -#endif
43.18 -#include <sys/types.h>
43.19 -#ifdef HAVE_SYS_MMAN_H
43.20 -#include <sys/mman.h>
43.21 -#endif
43.22 -#include <fcntl.h>
43.23 -#include <stdlib.h>
43.24 -#include <assert.h>
43.25 -
43.26 -#include "ompi/constants.h"
43.27 -
43.28 -#include "ompi/mca/rte/rte.h"
43.29 -
43.30 -#include "common_netpatterns.h"
43.31 -
43.32 -/* setup recursive doubleing tree node */
43.33 -
43.34 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
43.35 - int num_nodes, int node_rank, int tree_order, int *hier_ranks,
43.36 - mca_common_netpatterns_k_exchange_node_t *exchange_node)
43.37 -{
43.38 - /* local variables */
43.39 - int i, j, cnt, i_temp;
43.40 - int knt,knt2,kk, ex_node, stray;
43.41 - int n_levels,pow_k;
43.42 - int k_temp1;
43.43 - int k_temp2;
43.44 - int myid, reindex_myid = 0;
43.45 - int base, peer_base,base_temp;
43.46 - int peer;
43.47 - int *prev_data = NULL;
43.48 - int *current_data = NULL;
43.49 - int *group_info = NULL;
43.50 -
43.51 -
43.52 - NETPATTERNS_VERBOSE(
43.53 - ("Enter mca_common_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
43.54 - num_nodes, node_rank, tree_order));
43.55 -
43.56 - assert(num_nodes > 1);
43.57 - assert(tree_order > 1);
43.58 - if (tree_order > num_nodes) {
43.59 - tree_order = num_nodes;
43.60 - }
43.61 -
43.62 - /* k-nomial radix */
43.63 - exchange_node->tree_order = tree_order;
43.64 -
43.65 - /* Calculate the number of levels in the tree for
43.66 - * the largest power of tree_order less than or
43.67 - * equal to the group size
43.68 - */
43.69 - n_levels = 0;
43.70 - cnt=1;
43.71 - while ( num_nodes > cnt ) {
43.72 - cnt *= tree_order;
43.73 - n_levels++;
43.74 - }
43.75 - /* this is the actual number of recusive k-ing steps
43.76 - * we will perform, the last step may not be a full
43.77 - * step depending on the outcome of the next conditional
43.78 - */
43.79 - pow_k = n_levels;
43.80 -
43.81 - /* figure out the largest power of tree_order that is less than or equal to
43.82 - * num_nodes */
43.83 - if ( cnt > num_nodes) {
43.84 - cnt /= tree_order;
43.85 - n_levels--;
43.86 - }
43.87 -
43.88 - /*exchange_node->log_tree_order = n_levels;*/
43.89 - exchange_node->log_tree_order = pow_k;
43.90 - exchange_node->n_largest_pow_tree_order = cnt;
43.91 -
43.92 -
43.93 - /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
43.94 - /* I don't think we need to cache this info this group_info array */
43.95 - group_info = (int *) calloc(pow_k , sizeof(int));
43.96 - group_info[0] = num_nodes/tree_order;
43.97 - /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
43.98 - for ( i = 1; i < pow_k; i ++) {
43.99 - group_info[i] = group_info[i-1]/tree_order;
43.100 - /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
43.101 -
43.102 - }
43.103 -
43.104 - /* find number of incomplete groups and number of ranks belonging to those ranks */
43.105 - knt=0;
43.106 - while (knt <= (pow_k - 1) && group_info[knt] > 0) {
43.107 - knt++;
43.108 - }
43.109 - knt--;
43.110 - /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
43.111 -
43.112 - /* k_temp is a synonym for cnt which is the largest full power of k group */
43.113 - /* now, start the calculation to find the first stray rank aka "extra" rank */
43.114 - stray = 0;
43.115 - /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt,
43.116 - cnt*group_info[knt],
43.117 - num_nodes - cnt*group_info[knt]);*/
43.118 -
43.119 -
43.120 - /* cache this info, it's muy importante */
43.121 - stray = cnt*group_info[knt];
43.122 - exchange_node->k_nomial_stray = stray;
43.123 -
43.124 -
43.125 -
43.126 - /* before we do this, we need to first reindex */
43.127 - /* reindexing phase */
43.128 - /* this is the reindex phase */
43.129 - exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
43.130 - /* this is the inverse map */
43.131 - exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
43.132 - /*int reindex_myid;*/
43.133 - /* reindex */
43.134 - if( stray < num_nodes ) {
43.135 - /* find the first proxy rank */
43.136 - peer = stray - cnt;
43.137 - /* fix all ranks prior to this rank */
43.138 - for( i = 0; i < peer; i++){
43.139 - exchange_node->reindex_map[i] = i;
43.140 - }
43.141 - /* now, start the swap */
43.142 - exchange_node->reindex_map[peer] = peer;
43.143 - for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
43.144 - exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
43.145 - }
43.146 - i_temp = i;
43.147 - for( i = i_temp; i < stray; i++) {
43.148 - exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
43.149 - }
43.150 - /* now, finish it off */
43.151 - exchange_node->reindex_map[stray] = peer + 1;
43.152 - for( i = (stray+1); i < num_nodes; i++) {
43.153 - exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
43.154 - }
43.155 - /* debug print */
43.156 - /*
43.157 - for( i = 0; i < np; i++){
43.158 - fprintf(stderr,"%d ",reindex_map[i]);
43.159 - }
43.160 - fprintf(stderr,"\n");
43.161 - */
43.162 - } else {
43.163 - /* we have no extras, trivial reindexing */
43.164 - for( i = 0; i < num_nodes; i++){
43.165 - exchange_node->reindex_map[i] = i;
43.166 - }
43.167 - }
43.168 - /* finished reindexing */
43.169 -
43.170 - /* Now, I need to get my rank in the new indexing */
43.171 - for( i = 0; i < num_nodes; i++ ){
43.172 - if( node_rank == exchange_node->reindex_map[i] ){
43.173 - exchange_node->reindex_myid = i;
43.174 - break;
43.175 - }
43.176 - }
43.177 - /* Now, let's compute the inverse mapping here */
43.178 - for( i = 0; i < num_nodes; i++){
43.179 - j = 0;
43.180 - while(exchange_node->reindex_map[j] != i ){
43.181 - j++;
43.182 - }
43.183 - exchange_node->inv_reindex_map[i] = j;
43.184 - }
43.185 -
43.186 -
43.187 - /* Now we get the data sizes we should expect at each level */
43.188 - /* now get the size of the data I am to receive from each peer */
43.189 - /*int **payload_info;*/
43.190 - prev_data = (int *) malloc( num_nodes*sizeof(int) );
43.191 - if( NULL == prev_data ) {
43.192 - goto Error;
43.193 - }
43.194 -
43.195 - current_data = (int *) malloc( num_nodes*sizeof(int) );
43.196 - if( NULL == current_data ) {
43.197 - goto Error;
43.198 - }
43.199 -
43.200 -
43.201 - exchange_node->payload_info = (mca_common_netpatterns_payload_t **) malloc(sizeof(mca_common_netpatterns_payload_t *)*pow_k);
43.202 - if( NULL == exchange_node->payload_info) {
43.203 - goto Error;
43.204 - }
43.205 -
43.206 - for(i = 0; i < pow_k; i++){
43.207 - exchange_node->payload_info[i] = (mca_common_netpatterns_payload_t *) malloc(sizeof(mca_common_netpatterns_payload_t)*(tree_order-1));
43.208 - if( NULL == exchange_node->payload_info[i]) {
43.209 - goto Error;
43.210 - }
43.211 -
43.212 - }
43.213 - /* intialize the payload array
43.214 - This is the money struct, just need to initialize this with
43.215 - the subgroup information */
43.216 - /*
43.217 - for(i = 0; i < num_nodes; i++){
43.218 - prev_data[i] = 1;
43.219 - current_data[i] = 1;
43.220 - }
43.221 - */
43.222 -
43.223 - for(i = 0; i < num_nodes; i++){
43.224 - prev_data[i] = hier_ranks[i];
43.225 - current_data[i] = hier_ranks[i];
43.226 - }
43.227 -
43.228 - /* everyone will need to do this loop over all ranks
43.229 - * Phase I calculate the contribution from the extra ranks
43.230 - */
43.231 - for( myid = 0; myid < num_nodes; myid++) {
43.232 - /* get my new rank */
43.233 - for( j = 0; j < num_nodes; j++ ){
43.234 - /* this will be satisfied for one of the indices */
43.235 - if( myid == exchange_node->reindex_map[j] ){
43.236 - reindex_myid = j;
43.237 - break;
43.238 - }
43.239 - }
43.240 -
43.241 - for( j = stray; j < num_nodes; j++) {
43.242 - if(reindex_myid == ( j - cnt )) {
43.243 - /* then this is a proxy rank */
43.244 - prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
43.245 - break;
43.246 - }
43.247 -
43.248 - }
43.249 - }
43.250 -
43.251 - /* Phase II calculate the contribution from each recursive k - ing level
43.252 - *
43.253 - */
43.254 - k_temp1 = tree_order; /* k^1 */
43.255 - k_temp2 = 1; /* k^0 */
43.256 - peer_base = 0;
43.257 - base_temp = 0;
43.258 - for( i = 0; i < pow_k; i++) {
43.259 - /* get my new rank */
43.260 - for( myid = 0; myid < num_nodes; myid++){
43.261 - current_data[myid] = prev_data[myid];
43.262 - /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
43.263 - for( j = 0; j < num_nodes; j++ ){
43.264 - if( myid == exchange_node->reindex_map[j] ){
43.265 - reindex_myid = j;
43.266 - break;
43.267 - }
43.268 - }
43.269 - if( reindex_myid < stray ) {
43.270 - /* now start the actual algorithm */
43.271 - FIND_BASE(base,reindex_myid,i+1,tree_order);
43.272 - for( j = 0; j < ( tree_order - 1 ); j ++ ) {
43.273 - peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
43.274 - if( peer < stray ) {
43.275 - /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
43.276 - /* then get the data */
43.277 - if( node_rank == myid ){
43.278 - exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
43.279 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
43.280 - if( i > 0 ) {
43.281 -
43.282 - /* find my len and offset */
43.283 - FIND_BASE(peer_base,peer,i,tree_order);
43.284 - /* I do not want to mess with this, but it seems that I have no choice */
43.285 - ex_node = exchange_node->reindex_map[peer_base];
43.286 - /* now, find out how far down the line this guy really is */
43.287 - knt2 =0;
43.288 - for(kk = 0; kk < ex_node; kk++){
43.289 - knt2 += hier_ranks[kk];
43.290 - }
43.291 - exchange_node->payload_info[i][j].r_offset = knt2;
43.292 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
43.293 -
43.294 - FIND_BASE(base_temp,reindex_myid,i,tree_order);
43.295 - ex_node = exchange_node->reindex_map[base_temp];
43.296 - knt2 = 0;
43.297 - for( kk = 0; kk < ex_node; kk++){
43.298 - knt2 += hier_ranks[kk];
43.299 - }
43.300 - exchange_node->payload_info[i][j].s_offset =
43.301 - knt2; /* exchange_node->reindex_map[base_temp]; */
43.302 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
43.303 - } else {
43.304 - ex_node = exchange_node->reindex_map[peer];
43.305 - knt2 =0;
43.306 - for(kk = 0; kk < ex_node; kk++){
43.307 - knt2 += hier_ranks[kk];
43.308 - }
43.309 - exchange_node->payload_info[i][j].r_offset =
43.310 - knt2; /*exchange_node->reindex_map[peer]; */
43.311 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
43.312 - knt2 = 0;
43.313 - for(kk = 0; kk < myid; kk++){
43.314 - knt2 += hier_ranks[kk];
43.315 - }
43.316 - exchange_node->payload_info[i][j].s_offset = knt2;
43.317 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
43.318 - }
43.319 - /* how much I am to receive from this peer on this level */
43.320 - /* how much I am to send to this peer on this level */
43.321 - exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
43.322 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
43.323 - /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
43.324 - prev_data[exchange_node->reindex_map[peer]],
43.325 - exchange_node->reindex_map[peer], i+1);*/
43.326 - /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
43.327 - exchange_node->reindex_map[peer],i+1);*/
43.328 - }
43.329 -
43.330 - current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
43.331 - }
43.332 - }
43.333 - }
43.334 -
43.335 -
43.336 - }
43.337 - k_temp1 *= tree_order;
43.338 - k_temp2 *= tree_order;
43.339 - /* debug print */
43.340 - /* fprintf(stderr,"Level %d current data ",i+1);*/
43.341 - for( j = 0; j < num_nodes; j++){
43.342 - /* fprintf(stderr,"%d ",current_data[j]); */
43.343 - prev_data[j] = current_data[j];
43.344 - }
43.345 - /* fprintf(stderr,"\n");*/
43.346 -
43.347 - }
43.348 -
43.349 -
43.350 - /* this is the natural way to do recursive k-ing */
43.351 - /* should never have more than one extra rank per proxy */
43.352 - if( exchange_node->reindex_myid >= stray ){
43.353 - /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
43.354 - exchange_node->node_type = EXTRA_NODE;
43.355 - } else {
43.356 - exchange_node->node_type = EXCHANGE_NODE;
43.357 - }
43.358 -
43.359 - /* set node characteristics - node that is not within the largest
43.360 - * power of tree_order will just send its data to node that will participate
43.361 - * in the recursive k-ing, and get the result back at the end.
43.362 - * set the initial and final data exchanges - those that are not
43.363 - * part of the recursive k-ing.
43.364 - */
43.365 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.366 - exchange_node->n_extra_sources = 0;
43.367 - for( i = stray; i < num_nodes; i++) {
43.368 - if(exchange_node->reindex_myid == ( i - cnt )) {
43.369 - /* then I am a proxy rank and there is only a
43.370 - * single extra source
43.371 - */
43.372 - exchange_node->n_extra_sources = 1;
43.373 - break;
43.374 - }
43.375 - }
43.376 -
43.377 - if (exchange_node->n_extra_sources > 0) {
43.378 - exchange_node->rank_extra_sources_array = (int *) malloc
43.379 - (exchange_node->n_extra_sources * sizeof(int));
43.380 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.381 - goto Error;
43.382 - }
43.383 - /* you broke above */
43.384 - exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
43.385 - } else {
43.386 - exchange_node->rank_extra_sources_array = NULL;
43.387 - }
43.388 - } else {
43.389 - /* I am an extra rank, find my proxy rank */
43.390 - exchange_node->n_extra_sources = 1;
43.391 -
43.392 - exchange_node->rank_extra_sources_array = (int *) malloc
43.393 - (exchange_node->n_extra_sources * sizeof(int));
43.394 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.395 - goto Error;
43.396 - }
43.397 - exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
43.398 - }
43.399 -
43.400 -
43.401 - /* set the exchange pattern */
43.402 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.403 - /* yep, that's right PLUS 1 */
43.404 - exchange_node->n_exchanges = n_levels + 1;
43.405 - /* initialize this */
43.406 - exchange_node->n_actual_exchanges = 0;
43.407 - /* Allocate 2 dimension array thak keeps
43.408 - rank exchange information for each step*/
43.409 - exchange_node->rank_exchanges = (int **) malloc
43.410 - (exchange_node->n_exchanges * sizeof(int *));
43.411 - if(NULL == exchange_node->rank_exchanges) {
43.412 - goto Error;
43.413 - }
43.414 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.415 - exchange_node->rank_exchanges[i] = (int *) malloc
43.416 - ((tree_order - 1) * sizeof(int));
43.417 - if( NULL == exchange_node->rank_exchanges ) {
43.418 - goto Error;
43.419 - }
43.420 - }
43.421 - k_temp1 = tree_order;
43.422 - k_temp2 = 1;
43.423 - /* fill in exchange partners */
43.424 - /* Ok, now we start with the actual algorithm */
43.425 - for( i = 0; i < exchange_node->n_exchanges; i ++) {
43.426 - /*fprintf(stderr,"Starting Level %d\n",i+1);*/
43.427 -
43.428 - FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
43.429 - /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
43.430 - for( j = 0; j < (tree_order-1); j ++ ) {
43.431 - peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
43.432 - if ( peer < stray ) {
43.433 - exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
43.434 - /* an actual exchange occurs, bump the counter */
43.435 -
43.436 - } else {
43.437 - /* out of range, skip it - do not bump the n_actual_exchanges counter */
43.438 - exchange_node->rank_exchanges[i][j] = -1;
43.439 - }
43.440 -
43.441 - }
43.442 - k_temp1 *= tree_order;
43.443 - k_temp2 *= tree_order;
43.444 - }
43.445 - for(i = 0; i < pow_k; i++){
43.446 - for(j = 0; j < (tree_order-1); j++){
43.447 - if(-1 != exchange_node->rank_exchanges[i][j]){
43.448 - /* then bump the counter */
43.449 - exchange_node->n_actual_exchanges++;
43.450 - }
43.451 - }
43.452 - }
43.453 -
43.454 - } else {
43.455 - /* we are extra ranks and we don't participate in the exchange :( */
43.456 - exchange_node->n_exchanges=0;
43.457 - exchange_node->rank_exchanges=NULL;
43.458 - }
43.459 -
43.460 -
43.461 - /* set the number of tags needed per stripe - this must be the
43.462 - * same across all procs in the communicator.
43.463 - */
43.464 - /* do we need this one */
43.465 - exchange_node->n_tags = tree_order * n_levels + 1;
43.466 -
43.467 - free(prev_data);
43.468 - free(current_data);
43.469 - free(group_info);
43.470 -
43.471 - /* successful return */
43.472 - return OMPI_SUCCESS;
43.473 -
43.474 -Error:
43.475 -
43.476 - if (NULL != exchange_node->rank_extra_sources_array) {
43.477 - free(exchange_node->rank_extra_sources_array);
43.478 - }
43.479 -
43.480 - if (NULL != exchange_node->rank_exchanges) {
43.481 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.482 - if (NULL != exchange_node->rank_exchanges[i]) {
43.483 - free(exchange_node->rank_exchanges[i]);
43.484 - }
43.485 - }
43.486 - free(exchange_node->rank_exchanges);
43.487 - }
43.488 -
43.489 - if (NULL != prev_data ){
43.490 - free(prev_data);
43.491 - }
43.492 -
43.493 - if(NULL != current_data) {
43.494 - free(current_data);
43.495 - }
43.496 -
43.497 - if(NULL != group_info) {
43.498 - free(group_info);
43.499 - }
43.500 -
43.501 - /* error return */
43.502 - return OMPI_ERROR;
43.503 -}
43.504 -
43.505 -
43.506 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_tree_node(
43.507 - int num_nodes, int node_rank, int tree_order,
43.508 - mca_common_netpatterns_k_exchange_node_t *exchange_node)
43.509 -{
43.510 - /* local variables */
43.511 - int i, j, tmp, cnt;
43.512 - int n_levels;
43.513 - int k_base, kpow_num, peer;
43.514 -
43.515 - NETPATTERNS_VERBOSE(
43.516 - ("Enter mca_common_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
43.517 - num_nodes, node_rank, tree_order));
43.518 -
43.519 - assert(num_nodes > 1);
43.520 - assert(tree_order > 1);
43.521 - if (tree_order > num_nodes) {
43.522 - tree_order = num_nodes;
43.523 - }
43.524 -
43.525 - exchange_node->tree_order = tree_order;
43.526 -
43.527 - /* figure out number of levels in the tree */
43.528 - n_levels = 0;
43.529 - /* cnt - number of ranks in given level */
43.530 - cnt=1;
43.531 - while ( num_nodes > cnt ) {
43.532 - cnt *= tree_order;
43.533 - n_levels++;
43.534 - };
43.535 -
43.536 - /* figure out the largest power of tree_order that is less than or equal to
43.537 - * num_nodes */
43.538 - if ( cnt > num_nodes) {
43.539 - cnt /= tree_order;
43.540 - n_levels--;
43.541 - }
43.542 -
43.543 - exchange_node->log_tree_order = n_levels;
43.544 - exchange_node->n_largest_pow_tree_order = cnt;
43.545 -
43.546 - /* set node characteristics - node that is not within the largest
43.547 - * power of tree_order will just send it's data to node that will participate
43.548 - * in the recursive doubling, and get the result back at the end.
43.549 - */
43.550 - if (node_rank + 1 > cnt) {
43.551 - exchange_node->node_type = EXTRA_NODE;
43.552 - } else {
43.553 - exchange_node->node_type = EXCHANGE_NODE;
43.554 - }
43.555 -
43.556 -
43.557 - /* set the initial and final data exchanges - those that are not
43.558 - * part of the recursive doubling.
43.559 - */
43.560 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.561 - exchange_node->n_extra_sources = 0;
43.562 - for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
43.563 - tmp < num_nodes && i < tree_order - 1;
43.564 - ++i, ++tmp) {
43.565 - ++exchange_node->n_extra_sources;
43.566 - }
43.567 -
43.568 - assert(exchange_node->n_extra_sources < tree_order);
43.569 -
43.570 - if (exchange_node->n_extra_sources > 0) {
43.571 - exchange_node->rank_extra_sources_array = (int *) malloc
43.572 - (exchange_node->n_extra_sources * sizeof(int));
43.573 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.574 - goto Error;
43.575 - }
43.576 - for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
43.577 - i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
43.578 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
43.579 - exchange_node->rank_extra_sources_array[i] = tmp;
43.580 - }
43.581 - } else {
43.582 - exchange_node->rank_extra_sources_array = NULL;
43.583 - }
43.584 - } else {
43.585 - exchange_node->n_extra_sources = 1;
43.586 - exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
43.587 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.588 - goto Error;
43.589 - }
43.590 - exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
43.591 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
43.592 - exchange_node->rank_extra_sources_array[0] ));
43.593 - }
43.594 -
43.595 - /* set the exchange pattern */
43.596 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.597 - exchange_node->n_exchanges = n_levels;
43.598 - /* Allocate 2 dimension array thak keeps
43.599 - rank exchange information for each step*/
43.600 - exchange_node->rank_exchanges = (int **) malloc
43.601 - (exchange_node->n_exchanges * sizeof(int *));
43.602 - if(NULL == exchange_node->rank_exchanges) {
43.603 - goto Error;
43.604 - }
43.605 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.606 - exchange_node->rank_exchanges[i] = (int *) malloc
43.607 - ((tree_order - 1) * sizeof(int));
43.608 - if( NULL == exchange_node->rank_exchanges ) {
43.609 - goto Error;
43.610 - }
43.611 - }
43.612 - /* fill in exchange partners */
43.613 - for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges;
43.614 - i++, kpow_num *= tree_order) {
43.615 - k_base = node_rank / (kpow_num * tree_order);
43.616 - for(j = 1; j < tree_order; j++) {
43.617 - peer = node_rank + kpow_num * j;
43.618 - if (k_base != peer/(kpow_num * tree_order)) {
43.619 - /* Wraparound the number */
43.620 - peer = k_base * (kpow_num * tree_order) +
43.621 - peer % (kpow_num * tree_order);
43.622 - }
43.623 - exchange_node->rank_exchanges[i][j - 1] = peer;
43.624 - NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d",
43.625 - i, j, tree_order, peer));
43.626 - }
43.627 - }
43.628 - } else {
43.629 - exchange_node->n_exchanges=0;
43.630 - exchange_node->rank_exchanges=NULL;
43.631 - }
43.632 -
43.633 - /* set the number of tags needed per stripe - this must be the
43.634 - * same across all procs in the communicator.
43.635 - */
43.636 - /* do we need this one */
43.637 - exchange_node->n_tags = tree_order * n_levels + 1;
43.638 -
43.639 - /* successful return */
43.640 - return OMPI_SUCCESS;
43.641 -
43.642 -Error:
43.643 -
43.644 - if (NULL != exchange_node->rank_extra_sources_array) {
43.645 - free(exchange_node->rank_extra_sources_array);
43.646 - }
43.647 -
43.648 - if (NULL != exchange_node->rank_exchanges) {
43.649 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.650 - if (NULL != exchange_node->rank_exchanges[i]) {
43.651 - free(exchange_node->rank_exchanges[i]);
43.652 - }
43.653 - }
43.654 - free(exchange_node->rank_exchanges);
43.655 - }
43.656 -
43.657 - /* error return */
43.658 - return OMPI_ERROR;
43.659 -}
43.660 -
43.661 -#if 1
43.662 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
43.663 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.664 -{
43.665 - /* local variables */
43.666 - int i, tmp, cnt;
43.667 - int n_levels;
43.668 - int shift, mask;
43.669 -
43.670 - NETPATTERNS_VERBOSE(("Enter mca_common_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
43.671 -
43.672 - assert(num_nodes > 1);
43.673 - while (tree_order > num_nodes) {
43.674 - tree_order /= 2;
43.675 - }
43.676 -
43.677 - exchange_node->tree_order = tree_order;
43.678 - /* We support only tree_order that are power of two */
43.679 - assert(0 == (tree_order & (tree_order - 1)));
43.680 -
43.681 - /* figure out number of levels in the tree */
43.682 - n_levels = 0;
43.683 - /* cnt - number of ranks in given level */
43.684 - cnt=1;
43.685 - while ( num_nodes > cnt ) {
43.686 - cnt *= tree_order;
43.687 - n_levels++;
43.688 - };
43.689 -
43.690 - /* figure out the largest power of tree_order that is less than or equal to
43.691 - * num_nodes */
43.692 - if ( cnt > num_nodes) {
43.693 - cnt /= tree_order;
43.694 - n_levels--;
43.695 - }
43.696 - exchange_node->log_tree_order = n_levels;
43.697 - if (2 == tree_order) {
43.698 - exchange_node->log_2 = exchange_node->log_tree_order;
43.699 - }
43.700 -
43.701 - tmp=1;
43.702 - for (i=0 ; i < n_levels ; i++ ) {
43.703 - tmp *= tree_order;
43.704 - }
43.705 - /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
43.706 - assert(tmp == cnt);
43.707 -
43.708 - exchange_node->n_largest_pow_tree_order = tmp;
43.709 - if (2 == tree_order) {
43.710 - exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
43.711 - }
43.712 -
43.713 - /* set node characteristics - node that is not within the largest
43.714 - * power of tree_order will just send it's data to node that will participate
43.715 - * in the recursive doubling, and get the result back at the end.
43.716 - */
43.717 - if ( node_rank + 1 > cnt ) {
43.718 - exchange_node->node_type = EXTRA_NODE;
43.719 - } else {
43.720 - exchange_node->node_type = EXCHANGE_NODE;
43.721 - }
43.722 -
43.723 - /* set the initial and final data exchanges - those that are not
43.724 - * part of the recursive doubling.
43.725 - */
43.726 - if ( EXCHANGE_NODE == exchange_node->node_type ) {
43.727 - exchange_node->n_extra_sources = 0;
43.728 - for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
43.729 - ++exchange_node->n_extra_sources;
43.730 - }
43.731 - if (exchange_node->n_extra_sources > 0) {
43.732 - exchange_node->rank_extra_sources_array = (int *) malloc
43.733 - (exchange_node->n_extra_sources * sizeof(int));
43.734 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.735 - goto Error;
43.736 - }
43.737 - for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
43.738 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
43.739 - exchange_node->rank_extra_sources_array[i] = tmp;
43.740 - }
43.741 - } else {
43.742 - exchange_node->rank_extra_sources_array = NULL;
43.743 - }
43.744 - } else {
43.745 - exchange_node->n_extra_sources = 1;
43.746 - exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
43.747 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.748 - goto Error;
43.749 - }
43.750 - exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
43.751 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
43.752 - }
43.753 -
43.754 - /* Ishai: To be compatable with the old structure - should be remoived later */
43.755 - if (1 == exchange_node->n_extra_sources) {
43.756 - exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
43.757 - } else {
43.758 - exchange_node->rank_extra_source = -1;
43.759 - }
43.760 -
43.761 - /* set the exchange pattern */
43.762 - if ( EXCHANGE_NODE == exchange_node->node_type ) {
43.763 - exchange_node->n_exchanges = n_levels * (tree_order - 1);
43.764 - exchange_node->rank_exchanges = (int *) malloc
43.765 - (exchange_node->n_exchanges * sizeof(int));
43.766 - if( NULL == exchange_node->rank_exchanges ) {
43.767 - goto Error;
43.768 - }
43.769 -
43.770 - /* fill in exchange partners */
43.771 - for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
43.772 - for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
43.773 - exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
43.774 - NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
43.775 - }
43.776 - }
43.777 -
43.778 - } else {
43.779 -
43.780 - exchange_node->n_exchanges=0;
43.781 - exchange_node->rank_exchanges=NULL;
43.782 -
43.783 - }
43.784 -
43.785 - /* set the number of tags needed per stripe - this must be the
43.786 - * same across all procs in the communicator.
43.787 - */
43.788 - /* Ishai: Need to find out what is n_tags */
43.789 - exchange_node->n_tags = tree_order * n_levels + 1;
43.790 -
43.791 - /* successful return */
43.792 - return OMPI_SUCCESS;
43.793 -
43.794 -Error:
43.795 - if (exchange_node->rank_extra_sources_array != NULL) {
43.796 - free(exchange_node->rank_extra_sources_array);
43.797 - }
43.798 -
43.799 - /* error return */
43.800 - return OMPI_ERROR;
43.801 -}
43.802 -
43.803 -OMPI_DECLSPEC void mca_common_netpatterns_free_recursive_doubling_tree_node(
43.804 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.805 -{
43.806 - NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
43.807 - if (exchange_node->rank_extra_sources_array != NULL) {
43.808 - free(exchange_node->rank_extra_sources_array);
43.809 - }
43.810 -
43.811 - if (exchange_node->rank_exchanges != NULL) {
43.812 - free(exchange_node->rank_exchanges);
43.813 - }
43.814 -}
43.815 -#endif
43.816 -
43.817 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
43.818 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.819 -{
43.820 - return mca_common_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
43.821 -}
43.822 -
43.823 -#if 0
43.824 -/*OMPI_DECLSPEC int old_mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
43.825 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
43.826 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.827 -{
43.828 - /* local variables */
43.829 - /*int tree_order;*/
43.830 - int i,tmp,cnt,result,n_extra_nodes;
43.831 - int n_exchanges;
43.832 -
43.833 - /* figure out number of levels in the tree */
43.834 -
43.835 - n_exchanges=0;
43.836 - result=num_nodes;
43.837 -/* tree_order=2;*/
43.838 - /* cnt - number of ranks in given level */
43.839 - cnt=1;
43.840 - while( num_nodes > cnt ) {
43.841 - cnt*=tree_order;
43.842 - n_exchanges++;
43.843 - };
43.844 -
43.845 - /* figure out the largest power of 2 that is less than or equal to
43.846 - * num_nodes */
43.847 - if( cnt > num_nodes) {
43.848 - cnt/=tree_order;
43.849 - n_exchanges--;
43.850 - }
43.851 - exchange_node->log_2=n_exchanges;
43.852 -
43.853 - tmp=1;
43.854 - for(i=0 ; i < n_exchanges ; i++ ) {
43.855 - tmp*=2;
43.856 - }
43.857 - exchange_node->n_largest_pow_2=tmp;
43.858 -
43.859 - /* set node characteristics - node that is not within the largest
43.860 - * power of 2 will just send it's data to node that will participate
43.861 - * in the recursive doubling, and get the result back at the end.
43.862 - */
43.863 - if( node_rank+1 > cnt ) {
43.864 - exchange_node->node_type=EXTRA_NODE;
43.865 - } else {
43.866 - exchange_node->node_type=EXCHANGE_NODE;
43.867 - }
43.868 -
43.869 - /* set the initial and final data exchanges - those that are not
43.870 - * part of the recursive doubling.
43.871 - */
43.872 - n_extra_nodes=num_nodes-cnt;
43.873 -
43.874 - if ( EXCHANGE_NODE == exchange_node->node_type ) {
43.875 -
43.876 - if( node_rank < n_extra_nodes ) {
43.877 - exchange_node->n_extra_sources=1;
43.878 - exchange_node->rank_extra_source=cnt+node_rank;
43.879 - } else {
43.880 - exchange_node->n_extra_sources=0;
43.881 - exchange_node->rank_extra_source=-1;
43.882 - }
43.883 -
43.884 - } else {
43.885 - exchange_node->n_extra_sources=1;
43.886 - exchange_node->rank_extra_source=node_rank-cnt;
43.887 - }
43.888 -
43.889 - /* set the exchange pattern */
43.890 - if( EXCHANGE_NODE == exchange_node->node_type ) {
43.891 -
43.892 - exchange_node->n_exchanges=n_exchanges;
43.893 - exchange_node->rank_exchanges=(int *) malloc
43.894 - (n_exchanges*sizeof(int));
43.895 - if( NULL == exchange_node->rank_exchanges ) {
43.896 - goto Error;
43.897 - }
43.898 -
43.899 - /* fill in exchange partners */
43.900 - result=1;
43.901 - tmp=node_rank;
43.902 - for( i=0 ; i < n_exchanges ; i++ ) {
43.903 - if(tmp & 1 ) {
43.904 - exchange_node->rank_exchanges[i]=
43.905 - node_rank-result;
43.906 - } else {
43.907 - exchange_node->rank_exchanges[i]=
43.908 - node_rank+result;
43.909 - }
43.910 - result*=2;
43.911 - tmp/=2;
43.912 - }
43.913 -
43.914 - } else {
43.915 -
43.916 - exchange_node->n_exchanges=0;
43.917 - exchange_node->rank_exchanges=NULL;
43.918 -
43.919 - }
43.920 -
43.921 - /* set the number of tags needed per stripe - this must be the
43.922 - * same across all procs in the communicator.
43.923 - */
43.924 - exchange_node->n_tags=2*n_exchanges+1;
43.925 -
43.926 - /* Ishai: to make sure free will work also for people that call this function */
43.927 - exchange_node->rank_extra_sources_array = NULL;
43.928 -
43.929 - /* successful return */
43.930 - return OMPI_SUCCESS;
43.931 -
43.932 -Error:
43.933 -
43.934 - /* error return */
43.935 - return OMPI_ERROR;
43.936 -}
43.937 -#endif
43.938 -
44.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h Tue Feb 05 18:15:32 2013 +0000
44.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
44.3 @@ -1,254 +0,0 @@
44.4 -/*
44.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
44.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
44.7 - * Copyright (c) 2012 Los Alamos National Security, LLC.
44.8 - * All rights reserved.
44.9 - * $COPYRIGHT$
44.10 - *
44.11 - * Additional copyrights may follow
44.12 - *
44.13 - * $HEADER$
44.14 - */
44.15 -
44.16 -#ifndef COMM_PATTERNS_KNOMIAL_TREE_H
44.17 -#define COMM_PATTERNS_KNOMIAL_TREE_H
44.18 -
44.19 -#include "ompi_config.h"
44.20 -
44.21 -BEGIN_C_DECLS
44.22 -
44.23 -
44.24 -/*
44.25 - * Pair-wise data exchange
44.26 - */
44.27 -
44.28 -/* enum for node type */
44.29 -enum {
44.30 - EXCHANGE_NODE,
44.31 - EXTRA_NODE
44.32 -};
44.33 -
44.34 -struct mca_common_netpatterns_pair_exchange_node_t {
44.35 -
44.36 - /* Order of a node in the tree - usually 2 */
44.37 - int tree_order;
44.38 -
44.39 - /* number of nodes this node will exchange data with */
44.40 - int n_exchanges;
44.41 -
44.42 - /* ranks of nodes involved in data exchnge */
44.43 - int *rank_exchanges;
44.44 -
44.45 - /* number of extra sources of data - outside largest power of 2 in
44.46 - * this group */
44.47 - int n_extra_sources;
44.48 -
44.49 - /* rank of the extra source */
44.50 - /* deprecated */ int rank_extra_source;
44.51 - int *rank_extra_sources_array;
44.52 -
44.53 - /* number of tags needed per stripe */
44.54 - int n_tags;
44.55 -
44.56 - /* log 2 of largest full power of 2 for this node set */
44.57 - /* deprecated */ int log_2;
44.58 - int log_tree_order;
44.59 -
44.60 - /* largest power of 2 that fits in this group */
44.61 - /* deprecated */ int n_largest_pow_2;
44.62 - int n_largest_pow_tree_order;
44.63 -
44.64 - /* node type */
44.65 - int node_type;
44.66 -
44.67 -};
44.68 -typedef struct mca_common_netpatterns_pair_exchange_node_t mca_common_netpatterns_pair_exchange_node_t;
44.69 -
44.70 -struct mca_common_netpatterns_payload_t {
44.71 - int s_len;
44.72 - int r_len;
44.73 - int s_offset;
44.74 - int r_offset;
44.75 -};
44.76 -typedef struct mca_common_netpatterns_payload_t mca_common_netpatterns_payload_t;
44.77 -
44.78 -struct mca_common_netpatterns_k_exchange_node_t {
44.79 - /* Order of a node in the tree - usually 2 */
44.80 - int tree_order;
44.81 - /* number of nodes this node will exchange data with */
44.82 - int n_exchanges;
44.83 - /* total number of exchanges that I actually participate in */
44.84 - int n_actual_exchanges;
44.85 - /* ranks of nodes involved in data exchnge */
44.86 - int **rank_exchanges;
44.87 - /* number of extra sources of data - outside largest power of 2 in
44.88 - * this group */
44.89 - int n_extra_sources;
44.90 - /* rank/s of the extra source */
44.91 - int *rank_extra_sources_array;
44.92 - /* number of tags needed per stripe */
44.93 - int n_tags;
44.94 - /* log k of largest full power of k for this node set */
44.95 - int log_tree_order;
44.96 - /* largest power of k that fits in this group */
44.97 - int n_largest_pow_tree_order;
44.98 - /* node type */
44.99 - int node_type;
44.100 - /* start of extra ranks k_nomial */
44.101 - int k_nomial_stray;
44.102 - /* reindex map */
44.103 - int *reindex_map;
44.104 - /* inverse of reindex map, i.e. given a reindexed id find out its actual rank */
44.105 - int *inv_reindex_map;
44.106 - /* reindexed node_rank */
44.107 - int reindex_myid;
44.108 - /* 2-d array that hold payload info for each level of recursive k-ing */
44.109 - mca_common_netpatterns_payload_t **payload_info;
44.110 -};
44.111 -typedef struct mca_common_netpatterns_k_exchange_node_t
44.112 - mca_common_netpatterns_k_exchange_node_t;
44.113 -
44.114 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
44.115 - mca_common_netpatterns_pair_exchange_node_t *exchange_node);
44.116 -
44.117 -OMPI_DECLSPEC void mca_common_netpatterns_free_recursive_doubling_tree_node(
44.118 - mca_common_netpatterns_pair_exchange_node_t *exchange_node);
44.119 -
44.120 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
44.121 - mca_common_netpatterns_pair_exchange_node_t *exchange_node);
44.122 -
44.123 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_tree_node(
44.124 - int num_nodes, int node_rank, int tree_order,
44.125 - mca_common_netpatterns_k_exchange_node_t *exchange_node);
44.126 -
44.127 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
44.128 - int num_nodes, int node_rank, int tree_order, int *hier_ranks,
44.129 - mca_common_netpatterns_k_exchange_node_t *exchange_node);
44.130 -
44.131 -
44.132 -/* Input: k_exchange_node structure
44.133 - Output: index in rank_exchanges array that points
44.134 - to the "start_point" for outgoing send.
44.135 -
44.136 - Please see below example of usage:
44.137 - for (i = start_point ; i > 0; i--)
44.138 - for (k = 0; k < tree_radix; k++)
44.139 - send messages to exchange_node->rank_exchanges[i][k];
44.140 -*/
44.141 -
44.142 -static inline __opal_attribute_always_inline__
44.143 -int mca_common_netpatterns_get_knomial_level(
44.144 - int my_rank, int src_rank,
44.145 - int radix, int size,
44.146 - int *k_level)
44.147 -{
44.148 - int distance,
44.149 - pow_k;
44.150 - int logk_level = 0;
44.151 -
44.152 - /* Calculate disctance from source of data */
44.153 - distance = src_rank - my_rank;
44.154 -
44.155 - /* Wrap around */
44.156 - if (0 > distance) {
44.157 - distance += size;
44.158 - }
44.159 -
44.160 - pow_k = 1;
44.161 - while(distance / (pow_k * radix)) {
44.162 - pow_k *= radix;
44.163 - ++logk_level;
44.164 - }
44.165 - --logk_level;
44.166 -
44.167 - *k_level = pow_k;
44.168 - return logk_level;
44.169 -}
44.170 -
44.171 -/* Input: my_rank, root, radix, size
44.172 - * Output: source of the data, offset in power of K
44.173 - */
44.174 -static inline __opal_attribute_always_inline__
44.175 -int mca_common_netpatterns_get_knomial_data_source(
44.176 - int my_rank, int root, int radix, int size,
44.177 - int *k_level, int *logk_level)
44.178 -{
44.179 - int level = radix;
44.180 - int step = 0;
44.181 -
44.182 - /* Calculate source of the data */
44.183 - while((0 == (root - my_rank) % level)
44.184 - && (level <= size)) {
44.185 - level *= radix;
44.186 - ++step;
44.187 - }
44.188 -
44.189 - *k_level = level/radix;
44.190 - *logk_level = step;
44.191 - return my_rank - (my_rank % level - root % level);
44.192 -}
44.193 -
44.194 -/* Input: my_rank, radix,
44.195 - * k_level - that you get from mca_common_netpatterns_get_knomial_data_source
44.196 - * k_step - some integer
44.197 - * Output: peer - next children in the tree
44.198 - * Usage:
44.199 - * src = mca_common_netpatterns_get_knomial_data_source(
44.200 - * my_rank, root, radix, size,
44.201 - * &k_level, &logk_level)
44.202 - * recv_from(src......);
44.203 - *
44.204 - * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, k_level, my_rank);
44.205 - * while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
44.206 - * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer);
44.207 - * send_to(peer....);
44.208 - * }
44.209 - * for more example please grep in ptpcoll bcol bcast files
44.210 - */
44.211 -
44.212 -typedef struct mca_common_netpatter_knomial_step_info_t {
44.213 - int k_step;
44.214 - int k_level;
44.215 - int k_tmp_peer;
44.216 -} mca_common_netpatter_knomial_step_info_t;
44.217 -
44.218 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix)\
44.219 -do { \
44.220 - if (1 != step_info.k_step) { \
44.221 - step_info.k_level /= radix; \
44.222 - } \
44.223 -} while (0) \
44.224 -
44.225 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, in_k_level, in_peer)\
44.226 -do { \
44.227 - step_info.k_step = 1; \
44.228 - step_info.k_level = in_k_level; \
44.229 - step_info.k_tmp_peer = in_peer; \
44.230 -} while (0)
44.231 -
44.232 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info) \
44.233 - (step_info.k_level > 1)
44.234 -
44.235 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer) \
44.236 -do { \
44.237 - int rank_radix_base = my_rank/step_info.k_level; \
44.238 - \
44.239 - peer = step_info.k_tmp_peer + step_info.k_level/radix; \
44.240 - if (rank_radix_base != peer/step_info.k_level) { \
44.241 - /* Wraparound the number */ \
44.242 - peer -= step_info.k_level; \
44.243 - assert(peer >=0); \
44.244 - } \
44.245 - ++step_info.k_step; \
44.246 - if (radix == step_info.k_step) { \
44.247 - step_info.k_level /= radix; \
44.248 - step_info.k_step = 1; \
44.249 - step_info.k_tmp_peer = my_rank; \
44.250 - } else { \
44.251 - step_info.k_tmp_peer = peer; \
44.252 - } \
44.253 - \
44.254 -} while (0)
44.255 -
44.256 -END_C_DECLS
44.257 -#endif
45.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_multinomial_tree.c Tue Feb 05 18:15:32 2013 +0000
45.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
45.3 @@ -1,190 +0,0 @@
45.4 -/*
45.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
45.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
45.7 - * $COPYRIGHT$
45.8 - *
45.9 - * Additional copyrights may follow
45.10 - *
45.11 - * $HEADER$
45.12 - */
45.13 -
45.14 -#include "ompi_config.h"
45.15 -#ifdef HAVE_UNISTD_H
45.16 -#include <unistd.h>
45.17 -#endif
45.18 -#include <sys/types.h>
45.19 -#ifdef HAVE_SYS_MMAN_H
45.20 -#include <sys/mman.h>
45.21 -#endif
45.22 -#include <fcntl.h>
45.23 -#include <stdlib.h>
45.24 -
45.25 -#include "ompi/constants.h"
45.26 -#include "common_netpatterns.h"
45.27 -
45.28 -
45.29 -/* setup an multi-nomial tree - for each node in the tree
45.30 - * this returns it's parent, and it's children */
45.31 -
45.32 -OMPI_DECLSPEC int mca_common_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
45.33 - mca_common_netpatterns_tree_node_t *tree_nodes)
45.34 -{
45.35 - /* local variables */
45.36 - int i,result;
45.37 - int cnt, n_nodes_in_this_level,node_index;
45.38 - int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
45.39 - int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
45.40 - int n_lvls_in_tree;
45.41 -
45.42 - /* sanity check */
45.43 - if( 1 >= tree_order ) {
45.44 - goto Error;
45.45 - }
45.46 -
45.47 -
45.48 - /* figure out number of levels in the tree */
45.49 -
45.50 - n_lvls_in_tree=0;
45.51 - result=num_nodes;
45.52 - /* cnt - number of ranks in given level */
45.53 - cnt=1;
45.54 - /* cummulative count of ranks */
45.55 - while( 0 < result ) {
45.56 - result-=cnt;
45.57 - cnt*=tree_order;
45.58 - n_lvls_in_tree++;
45.59 - };
45.60 -
45.61 - /* loop over tree levels */
45.62 - n_nodes_in_this_level=1;
45.63 - node_index=-1;
45.64 - n_cum_nodes=0;
45.65 - for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
45.66 -
45.67 - /* loop over nodes in current level */
45.68 - for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
45.69 - /* get node index */
45.70 - node_index++;
45.71 -
45.72 - /* break if reach group size */
45.73 - if( node_index == num_nodes) {
45.74 - break;
45.75 - }
45.76 -
45.77 - tree_nodes[node_index].my_rank=node_index;
45.78 - tree_nodes[node_index].children_ranks=NULL;
45.79 -
45.80 - /*
45.81 - * Parents
45.82 - */
45.83 - if( 0 == current_level ) {
45.84 - tree_nodes[node_index].n_parents=0;
45.85 - /* get parent index */
45.86 - tree_nodes[node_index].parent_rank=-1;
45.87 - } else {
45.88 - tree_nodes[node_index].n_parents=1;
45.89 - /* get parent index */
45.90 - n_nodes_prev_level=n_nodes_in_this_level/tree_order;
45.91 - if( current_level == n_lvls_in_tree -1 ) {
45.92 - /* load balance the lowest level */
45.93 - parent_rank=node-
45.94 - (node/n_nodes_prev_level)*n_nodes_prev_level;
45.95 - parent_rank=n_cum_nodes-n_nodes_prev_level+
45.96 - parent_rank;
45.97 - tree_nodes[node_index].parent_rank=parent_rank;
45.98 - } else {
45.99 - tree_nodes[node_index].parent_rank=
45.100 - (n_cum_nodes-n_nodes_prev_level)+node/tree_order;
45.101 - }
45.102 - }
45.103 -
45.104 - /*
45.105 - * Children
45.106 - */
45.107 -
45.108 - /* get number of children */
45.109 - if( (n_lvls_in_tree-1) == current_level ) {
45.110 - /* leaves have no nodes */
45.111 - tree_nodes[node_index].n_children=0;
45.112 - tree_nodes[node_index].children_ranks=NULL;
45.113 - } else {
45.114 - /* take into account last level being incomplete */
45.115 - if( (n_lvls_in_tree-2) == current_level ) {
45.116 - /* last level is load balanced */
45.117 - n_nodes_in_last_level=num_nodes-
45.118 - (n_cum_nodes+n_nodes_in_this_level);
45.119 - n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
45.120 - n_in_partial_stipe=n_nodes_in_last_level-
45.121 - n_full_stripes*n_nodes_in_this_level;
45.122 - n_children=n_full_stripes;
45.123 - if( n_full_stripes < tree_order ) {
45.124 - if( node <= n_in_partial_stipe-1 ) {
45.125 - n_children++;
45.126 - }
45.127 - }
45.128 - tree_nodes[node_index].n_children=n_children;
45.129 - if( 0 < n_children ) {
45.130 - tree_nodes[node_index].children_ranks=(int *)
45.131 - malloc(sizeof(int)*n_children);
45.132 - if( NULL == tree_nodes[node_index].children_ranks) {
45.133 - goto Error;
45.134 - }
45.135 - } else {
45.136 - tree_nodes[node_index].children_ranks=NULL;
45.137 - }
45.138 - /* fill in list */
45.139 - for( rank=0 ; rank < n_children ; rank++ ) {
45.140 - tree_nodes[node_index].children_ranks[rank]=
45.141 - node+rank*n_nodes_in_this_level;
45.142 - tree_nodes[node_index].children_ranks[rank]+=
45.143 - (n_cum_nodes+n_nodes_in_this_level);
45.144 - }
45.145 - } else {
45.146 - n_children=tree_order;
45.147 - tree_nodes[node_index].n_children=tree_order;
45.148 - tree_nodes[node_index].children_ranks=(int *)
45.149 - malloc(sizeof(int)*n_children);
45.150 - if( NULL == tree_nodes[node_index].children_ranks) {
45.151 - goto Error;
45.152 - }
45.153 - for( rank=0 ; rank < n_children ; rank++ ) {
45.154 - tree_nodes[node_index].children_ranks[rank]=
45.155 - rank+tree_order*node;
45.156 - tree_nodes[node_index].children_ranks[rank]+=
45.157 - (n_cum_nodes+n_nodes_in_this_level);
45.158 - }
45.159 - }
45.160 - }
45.161 -
45.162 - } /* end node loop */
45.163 -
45.164 - /* update helper counters */
45.165 - n_cum_nodes+=n_nodes_in_this_level;
45.166 - n_nodes_in_this_level*=tree_order;
45.167 - }
45.168 -
45.169 - /* set node type */
45.170 - for(i=0 ; i < num_nodes ; i++ ) {
45.171 - if( 0 == tree_nodes[i].n_parents ) {
45.172 - tree_nodes[i].my_node_type=ROOT_NODE;
45.173 - } else if ( 0 == tree_nodes[i].n_children ) {
45.174 - tree_nodes[i].my_node_type=LEAF_NODE;
45.175 - } else {
45.176 - tree_nodes[i].my_node_type=INTERIOR_NODE;
45.177 - }
45.178 - }
45.179 -
45.180 - /* successful return */
45.181 - return OMPI_SUCCESS;
45.182 -
45.183 -Error:
45.184 - /* free allocated memory */
45.185 - for( i=0 ; i < num_nodes ; i++ ) {
45.186 - if( NULL != tree_nodes[i].children_ranks ) {
45.187 - free(tree_nodes[i].children_ranks);
45.188 - }
45.189 - }
45.190 -
45.191 - /* error return */
45.192 - return OMPI_ERROR;
45.193 -}
46.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_nary_tree.c Tue Feb 05 18:15:32 2013 +0000
46.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
46.3 @@ -1,442 +0,0 @@
46.4 -/*
46.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
46.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
46.7 - * $COPYRIGHT$
46.8 - *
46.9 - * Additional copyrights may follow
46.10 - *
46.11 - * $HEADER$
46.12 - */
46.13 -
46.14 -#include "ompi_config.h"
46.15 -#ifdef HAVE_UNISTD_H
46.16 -#include <unistd.h>
46.17 -#endif
46.18 -#include <sys/types.h>
46.19 -#ifdef HAVE_SYS_MMAN_H
46.20 -#include <sys/mman.h>
46.21 -#endif
46.22 -#include <fcntl.h>
46.23 -#include <errno.h>
46.24 -#include <stdlib.h>
46.25 -
46.26 -#include "ompi/constants.h"
46.27 -#include "common_netpatterns.h"
46.28 -
46.29 -/*
46.30 - * Create mmaped shared file
46.31 - */
46.32 -
46.33 -/* setup an n-array tree */
46.34 -
46.35 -int mca_common_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
46.36 - mca_common_netpatterns_tree_node_t *my_node)
46.37 -{
46.38 - /* local variables */
46.39 - int n_levels, result;
46.40 - int my_level_in_tree, cnt;
46.41 - int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
46.42 - int start_index,end_index;
46.43 -
46.44 - /* sanity check */
46.45 - if( 1 >= tree_order ) {
46.46 - goto Error;
46.47 - }
46.48 -
46.49 - my_node->my_rank=my_rank;
46.50 - my_node->tree_size=num_nodes;
46.51 -
46.52 - /* figure out number of levels in tree */
46.53 - n_levels=0;
46.54 - result=num_nodes-1;
46.55 - while (0 < result ) {
46.56 - result/=tree_order;
46.57 - n_levels++;
46.58 - };
46.59 -
46.60 - /* figure out who my children and parents are */
46.61 - my_level_in_tree=-1;
46.62 - result=my_rank;
46.63 - /* cnt - number of ranks in given level */
46.64 - cnt=1;
46.65 - /* cummulative count of ranks */
46.66 - while( 0 <= result ) {
46.67 - result-=cnt;
46.68 - cnt*=tree_order;
46.69 - my_level_in_tree++;
46.70 - };
46.71 - /* int my_level_in_tree, n_children, n_parents; */
46.72 -
46.73 - if( 0 == my_rank ) {
46.74 - my_node->n_parents=0;
46.75 - my_node->parent_rank=-1;
46.76 - my_rank_in_my_level=0;
46.77 - } else {
46.78 - my_node->n_parents=1;
46.79 - cnt=1;
46.80 - cum_cnt=0;
46.81 - for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
46.82 - /* cummulative count up to this level */
46.83 - cum_cnt+=cnt;
46.84 - /* number of ranks in this level */
46.85 - cnt*=tree_order;
46.86 - }
46.87 - my_rank_in_my_level=my_rank-cum_cnt;
46.88 - /* tree_order consecutive ranks have the same parent */
46.89 - my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
46.90 - }
46.91 -
46.92 - /* figure out number of levels in the tree */
46.93 - n_lvls_in_tree=0;
46.94 - result=num_nodes;
46.95 - /* cnt - number of ranks in given level */
46.96 - cnt=1;
46.97 - /* cummulative count of ranks */
46.98 - while( 0 < result ) {
46.99 - result-=cnt;
46.100 - cnt*=tree_order;
46.101 - n_lvls_in_tree++;
46.102 - };
46.103 -
46.104 - my_node->children_ranks=(int *)NULL;
46.105 -
46.106 - /* get list of children */
46.107 - if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
46.108 - /* last level has no children */
46.109 - my_node->n_children=0;
46.110 - } else {
46.111 - cum_cnt=0;
46.112 - cnt=1;
46.113 - for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
46.114 - cum_cnt+=cnt;
46.115 - cnt*=tree_order;
46.116 - }
46.117 - start_index=cum_cnt+my_rank_in_my_level*tree_order;
46.118 - end_index=start_index+tree_order-1;
46.119 -
46.120 - /* don't go out of bounds at the end of the list */
46.121 - if( end_index >= num_nodes ) {
46.122 - end_index = num_nodes-1;
46.123 - }
46.124 -
46.125 - if( start_index <= (num_nodes-1) ) {
46.126 - my_node->n_children=end_index-start_index+1;
46.127 - } else {
46.128 - my_node->n_children=0;
46.129 - }
46.130 -
46.131 - my_node->children_ranks=NULL;
46.132 - if( 0 < my_node->n_children ) {
46.133 - my_node->children_ranks=
46.134 - (int *)malloc( sizeof(int)*my_node->n_children);
46.135 - if( NULL == my_node->children_ranks) {
46.136 - goto Error;
46.137 - }
46.138 - for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
46.139 - my_node->children_ranks[lvl-start_index]=lvl;
46.140 - }
46.141 - }
46.142 - }
46.143 - /* set node type */
46.144 - if( 0 == my_node->n_parents ) {
46.145 - my_node->my_node_type=ROOT_NODE;
46.146 - } else if ( 0 == my_node->n_children ) {
46.147 - my_node->my_node_type=LEAF_NODE;
46.148 - } else {
46.149 - my_node->my_node_type=INTERIOR_NODE;
46.150 - }
46.151 -
46.152 -
46.153 - /* successful return */
46.154 - return OMPI_SUCCESS;
46.155 -
46.156 -Error:
46.157 -
46.158 - /* error return */
46.159 - return OMPI_ERROR;
46.160 -}
46.161 -
46.162 -int mca_common_netpatterns_setup_narray_knomial_tree(
46.163 - int tree_order, int my_rank, int num_nodes,
46.164 - mca_common_netpatterns_narray_knomial_tree_node_t *my_node)
46.165 -{
46.166 - /* local variables */
46.167 - int n_levels, result;
46.168 - int my_level_in_tree, cnt ;
46.169 - int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
46.170 - int start_index,end_index;
46.171 - int rc;
46.172 -
46.173 - /* sanity check */
46.174 - if( 1 >= tree_order ) {
46.175 - goto Error;
46.176 - }
46.177 -
46.178 - my_node->my_rank=my_rank;
46.179 - my_node->tree_size=num_nodes;
46.180 -
46.181 - /* figure out number of levels in tree */
46.182 - n_levels=0;
46.183 - result=num_nodes-1;
46.184 - while (0 < result ) {
46.185 - result/=tree_order;
46.186 - n_levels++;
46.187 - };
46.188 -
46.189 - /* figure out who my children and parents are */
46.190 - my_level_in_tree=-1;
46.191 - result=my_rank;
46.192 - /* cnt - number of ranks in given level */
46.193 - cnt=1;
46.194 - /* cummulative count of ranks */
46.195 - while( 0 <= result ) {
46.196 - result-=cnt;
46.197 - cnt*=tree_order;
46.198 - my_level_in_tree++;
46.199 - };
46.200 - /* int my_level_in_tree, n_children, n_parents; */
46.201 -
46.202 - if( 0 == my_rank ) {
46.203 - my_node->n_parents=0;
46.204 - my_node->parent_rank=-1;
46.205 - my_rank_in_my_level=0;
46.206 - } else {
46.207 - my_node->n_parents=1;
46.208 - cnt=1;
46.209 - cum_cnt=0;
46.210 - for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
46.211 - /* cummulative count up to this level */
46.212 - cum_cnt+=cnt;
46.213 - /* number of ranks in this level */
46.214 - cnt*=tree_order;
46.215 - }
46.216 -
46.217 - my_node->rank_on_level =
46.218 - my_rank_in_my_level =
46.219 - my_rank-cum_cnt;
46.220 - my_node->level_size = cnt;
46.221 -
46.222 - rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
46.223 - my_node->level_size, my_node->rank_on_level,
46.224 - tree_order, &my_node->k_node);
46.225 - if (OMPI_SUCCESS != rc) {
46.226 - goto Error;
46.227 - }
46.228 -
46.229 - /* tree_order consecutive ranks have the same parent */
46.230 - my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
46.231 - }
46.232 -
46.233 - /* figure out number of levels in the tree */
46.234 - n_lvls_in_tree=0;
46.235 - result=num_nodes;
46.236 - /* cnt - number of ranks in given level */
46.237 - cnt=1;
46.238 - /* cummulative count of ranks */
46.239 - while( 0 < result ) {
46.240 - result-=cnt;
46.241 - cnt*=tree_order;
46.242 - n_lvls_in_tree++;
46.243 - };
46.244 -
46.245 - if(result < 0) {
46.246 - /* reset the size on group */
46.247 - num_nodes = cnt / tree_order;
46.248 - }
46.249 -
46.250 - my_node->children_ranks=(int *)NULL;
46.251 -
46.252 - /* get list of children */
46.253 - if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
46.254 - /* last level has no children */
46.255 - my_node->n_children=0;
46.256 - } else {
46.257 - cum_cnt=0;
46.258 - cnt=1;
46.259 - for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
46.260 - cum_cnt+=cnt;
46.261 - cnt*=tree_order;
46.262 - }
46.263 - start_index=cum_cnt+my_rank_in_my_level*tree_order;
46.264 - end_index=start_index+tree_order-1;
46.265 -
46.266 - /* don't go out of bounds at the end of the list */
46.267 - if( end_index >= num_nodes ) {
46.268 - end_index = num_nodes-1;
46.269 - }
46.270 -
46.271 - if( start_index <= (num_nodes-1) ) {
46.272 - my_node->n_children=end_index-start_index+1;
46.273 - } else {
46.274 - my_node->n_children=0;
46.275 - }
46.276 -
46.277 - my_node->children_ranks=NULL;
46.278 - if( 0 < my_node->n_children ) {
46.279 - my_node->children_ranks=
46.280 - (int *)malloc( sizeof(int)*my_node->n_children);
46.281 - if( NULL == my_node->children_ranks) {
46.282 - goto Error;
46.283 - }
46.284 - for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
46.285 - my_node->children_ranks[lvl-start_index]=lvl;
46.286 - }
46.287 - }
46.288 - }
46.289 - /* set node type */
46.290 - if( 0 == my_node->n_parents ) {
46.291 - my_node->my_node_type=ROOT_NODE;
46.292 - } else if ( 0 == my_node->n_children ) {
46.293 - my_node->my_node_type=LEAF_NODE;
46.294 - } else {
46.295 - my_node->my_node_type=INTERIOR_NODE;
46.296 - }
46.297 -
46.298 -
46.299 - /* successful return */
46.300 - return OMPI_SUCCESS;
46.301 -
46.302 -Error:
46.303 -
46.304 - /* error return */
46.305 - return OMPI_ERROR;
46.306 -}
46.307 -
46.308 -/* calculate the nearest power of radix that is equal to or greater
46.309 - * than size, with the specified radix. The resulting tree is of
46.310 - * depth n_lvls.
46.311 - */
46.312 -OMPI_DECLSPEC int roundup_to_power_radix ( int radix, int size, int *n_lvls )
46.313 -{
46.314 - int n_levels=0, return_value=1;
46.315 - int result;
46.316 - if( 1 > size ) {
46.317 - return 0;
46.318 - }
46.319 -
46.320 - result=size-1;
46.321 - while (0 < result ) {
46.322 - result/=radix;
46.323 - n_levels++;
46.324 - return_value*=radix;
46.325 - };
46.326 - *n_lvls=n_levels;
46.327 - return return_value;
46.328 -}
46.329 -
46.330 -static int fill_in_node_data(int tree_order, int num_nodes, int my_node,
46.331 - mca_common_netpatterns_tree_node_t *nodes_data)
46.332 -{
46.333 - /* local variables */
46.334 - int rc, num_ranks_per_child, num_children, n_extra;
46.335 - int child, rank, n_to_offset, n_ranks_to_child;
46.336 -
46.337 - /* figure out who are my children */
46.338 - num_ranks_per_child=num_nodes/tree_order;
46.339 - if( num_ranks_per_child ) {
46.340 - num_children=tree_order;
46.341 - n_extra=num_nodes-num_ranks_per_child*tree_order;
46.342 - } else {
46.343 - num_children=num_nodes;
46.344 - /* each child has the same number of descendents - 1 */
46.345 - n_extra=0;
46.346 - /* when there is a child, there is at least one
46.347 - * descendent */
46.348 - num_ranks_per_child=1;
46.349 - }
46.350 -
46.351 - nodes_data[my_node].n_children=num_children;
46.352 - if( num_children ) {
46.353 - nodes_data[my_node].children_ranks=(int *)
46.354 - malloc(sizeof(int)*num_children);
46.355 - if(!nodes_data[my_node].children_ranks) {
46.356 -
46.357 - if ( NULL == nodes_data[my_node].children_ranks )
46.358 - {
46.359 - fprintf(stderr, "Cannot allocate memory for children_ranks.\n");
46.360 - rc = OMPI_ERR_OUT_OF_RESOURCE;
46.361 - goto error;
46.362 - }
46.363 - }
46.364 - }
46.365 -
46.366 - rank = my_node;
46.367 - for( child=0 ; child < num_children ; child ++ ) {
46.368 -
46.369 - /* set parent information */
46.370 - nodes_data[rank].n_parents=1;
46.371 - nodes_data[rank].parent_rank=my_node;
46.372 - if( n_extra ) {
46.373 - n_to_offset=child;
46.374 - if( n_to_offset > n_extra){
46.375 - n_to_offset=n_extra;
46.376 - }
46.377 - } else {
46.378 - n_to_offset=0;
46.379 - }
46.380 -
46.381 - rank=my_node+1+child*num_ranks_per_child;
46.382 - rank+=n_to_offset;
46.383 -
46.384 - /* set parent information */
46.385 - nodes_data[rank].n_parents=1;
46.386 - nodes_data[rank].parent_rank=my_node;
46.387 -
46.388 - n_ranks_to_child=num_ranks_per_child;
46.389 - if(n_extra && (child < n_extra) ) {
46.390 - n_ranks_to_child++;
46.391 - }
46.392 -
46.393 - /* set child information */
46.394 - nodes_data[my_node].children_ranks[child]=rank;
46.395 -
46.396 - /* remove the child from the list of ranks */
46.397 - n_ranks_to_child--;
46.398 - rc=fill_in_node_data(tree_order, n_ranks_to_child, rank, nodes_data);
46.399 - if( OMPI_SUCCESS != rc ) {
46.400 - goto error;
46.401 - }
46.402 -
46.403 - }
46.404 -
46.405 - /* return */
46.406 - return OMPI_SUCCESS;
46.407 -
46.408 - /* Error */
46.409 -error:
46.410 - return rc;
46.411 -
46.412 -}
46.413 -
46.414 -/*
46.415 - * This routine sets up the array describing the communication tree for
46.416 - * a k-ary tree where the children form a contiguous range of ranks at
46.417 - * each level. The assumption here is that rank 0 is always the root -
46.418 - * ranks may be rotated based on who the actual root is, to obtain the
46.419 - * appropriate communication pattern for such roots.
46.420 - */
46.421 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree_contigous_ranks(
46.422 - int tree_order, int num_nodes,
46.423 - mca_common_netpatterns_tree_node_t **tree_nodes)
46.424 -{
46.425 - /* local variables */
46.426 - int num_descendent_ranks=num_nodes-1;
46.427 - int rc=OMPI_SUCCESS;
46.428 -
46.429 - *tree_nodes=(mca_common_netpatterns_tree_node_t *)malloc(
46.430 - sizeof(mca_common_netpatterns_tree_node_t)*
46.431 - num_nodes);
46.432 - if(!(*tree_nodes) ) {
46.433 - fprintf(stderr, "Cannot allocate memory for tree_nodes.\n");
46.434 - rc = OMPI_ERR_OUT_OF_RESOURCE;
46.435 - return rc;
46.436 - }
46.437 -
46.438 - (*tree_nodes)[0].n_parents=0;
46.439 - rc=fill_in_node_data(tree_order,
46.440 - num_descendent_ranks, 0, *tree_nodes);
46.441 -
46.442 - /* successful return */
46.443 - return rc;
46.444 -
46.445 -}
47.1 --- a/ompi/mca/sbgp/basesmsocket/Makefile.am Tue Feb 05 18:15:32 2013 +0000
47.2 +++ b/ompi/mca/sbgp/basesmsocket/Makefile.am Tue Feb 05 21:52:55 2013 +0000
47.3 @@ -35,8 +35,7 @@
47.4 mcacomponent_LTLIBRARIES = $(component_install)
47.5 mca_sbgp_basesmsocket_la_SOURCES = $(sources)
47.6 mca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version
47.7 -mca_sbgp_basesmsocket_la_LIBADD = \
47.8 - $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la
47.9 +mca_sbgp_basesmsocket_la_LIBADD =
47.10
47.11 noinst_LTLIBRARIES = $(component_noinst)
47.12 libmca_sbgp_basesmsocket_la_SOURCES =$(sources)
48.1 --- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c Tue Feb 05 18:15:32 2013 +0000
48.2 +++ b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c Tue Feb 05 21:52:55 2013 +0000
48.3 @@ -35,7 +35,7 @@
48.4 #include "ompi/communicator/communicator.h"
48.5 #include "sbgp_basesmsocket.h"
48.6
48.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
48.8 +#include "ompi/patterns/comm/coll_ops.h"
48.9
48.10
48.11 /*
49.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
49.2 +++ b/ompi/patterns/comm/Makefile.am Tue Feb 05 21:52:55 2013 +0000
49.3 @@ -0,0 +1,16 @@
49.4 +# Copyright (c) 2013 Oak Ridge National Laboratory. All rights reserved.
49.5 +# $COPYRIGHT$
49.6 +#
49.7 +# Additional copyrights may follow
49.8 +#
49.9 +# $HEADER$
49.10 +#
49.11 +
49.12 +headers += \
49.13 + patterns/comm/coll_ops.h \
49.14 + patterns/comm/commpatterns.h
49.15 +
49.16 +libmpi_la_SOURCES += \
49.17 + patterns/comm/allreduce.c \
49.18 + patterns/comm/allgather.c \
49.19 + patterns/comm/bcast.c
50.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
50.2 +++ b/ompi/patterns/comm/allgather.c Tue Feb 05 21:52:55 2013 +0000
50.3 @@ -0,0 +1,288 @@
50.4 +/*
50.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
50.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
50.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
50.8 + * All rights reserved.
50.9 + * $COPYRIGHT$
50.10 + *
50.11 + * Additional copyrights may follow
50.12 + *
50.13 + * $HEADER$
50.14 + */
50.15 +/** @file */
50.16 +
50.17 +#include "ompi_config.h"
50.18 +
50.19 +#include "ompi/constants.h"
50.20 +#include "ompi/op/op.h"
50.21 +#include "ompi/datatype/ompi_datatype.h"
50.22 +#include "ompi/communicator/communicator.h"
50.23 +#include "opal/include/opal/sys/atomic.h"
50.24 +#include "ompi/mca/pml/pml.h"
50.25 +#include "ompi/patterns/net/netpatterns.h"
50.26 +#include "coll_ops.h"
50.27 +
50.28 +/**
50.29 + * All-reduce - subgroup in communicator
50.30 + */
50.31 +OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
50.32 + ompi_datatype_t *dtype, int my_rank_in_group,
50.33 + int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
50.34 +{
50.35 + /* local variables */
50.36 + int rc=OMPI_SUCCESS,msg_cnt;
50.37 + int pair_rank,exchange,extra_rank, n_extra_nodes,n_extra;
50.38 + int proc_block,extra_start,extra_end,iovec_len;
50.39 + int remote_data_start_rank,remote_data_end_rank;
50.40 + int local_data_start_rank;
50.41 + netpatterns_pair_exchange_node_t my_exchange_node;
50.42 + size_t message_extent,current_data_extent,current_data_count;
50.43 + size_t dt_size;
50.44 + OPAL_PTRDIFF_TYPE dt_extent;
50.45 + char *src_buf_current;
50.46 + char *dest_buf_current;
50.47 + struct iovec send_iov[2] = {{0,0},{0,0}},
50.48 + recv_iov[2] = {{0,0},{0,0}};
50.49 + ompi_request_t *requests[4];
50.50 +
50.51 + /* get size of data needed - same layout as user data, so that
50.52 + * we can apply the reudction routines directly on these buffers
50.53 + */
50.54 + rc = ompi_datatype_type_size(dtype, &dt_size);
50.55 + if( OMPI_SUCCESS != rc ) {
50.56 + goto Error;
50.57 + }
50.58 +
50.59 + rc = ompi_datatype_type_extent(dtype, &dt_extent);
50.60 + if( OMPI_SUCCESS != rc ) {
50.61 + goto Error;
50.62 + }
50.63 + message_extent = dt_extent*count;
50.64 +
50.65 + /* place my data in the correct destination buffer */
50.66 + rc=ompi_datatype_copy_content_same_ddt(dtype,count,
50.67 + (char *)dest_buf+my_rank_in_group*message_extent,
50.68 + (char *)src_buf);
50.69 + if( OMPI_SUCCESS != rc ) {
50.70 + goto Error;
50.71 + }
50.72 +
50.73 + /* 1 process special case */
50.74 + if(1 == n_peers) {
50.75 + return OMPI_SUCCESS;
50.76 + }
50.77 +
50.78 + /* get my reduction communication pattern */
50.79 + rc = netpatterns_setup_recursive_doubling_tree_node(n_peers,
50.80 + my_rank_in_group, &my_exchange_node);
50.81 + if(OMPI_SUCCESS != rc){
50.82 + return rc;
50.83 + }
50.84 +
50.85 + n_extra_nodes=n_peers-my_exchange_node.n_largest_pow_2;
50.86 +
50.87 + /* get the data from the extra sources */
50.88 + if(0 < my_exchange_node.n_extra_sources) {
50.89 +
50.90 + if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
50.91 +
50.92 + /*
50.93 + ** Receive data from extra node
50.94 + */
50.95 +
50.96 + extra_rank=my_exchange_node.rank_extra_source;
50.97 + /* receive the data into the correct location - will use 2
50.98 + * messages in the recursive doubling phase */
50.99 + dest_buf_current=(char *)dest_buf+message_extent*extra_rank;
50.100 + rc=MCA_PML_CALL(recv(dest_buf_current,
50.101 + count,dtype,ranks_in_comm[extra_rank],
50.102 + -OMPI_COMMON_TAG_ALLREDUCE,
50.103 + comm, MPI_STATUSES_IGNORE));
50.104 + if( 0 > rc ) {
50.105 + goto Error;
50.106 + }
50.107 +
50.108 + } else {
50.109 +
50.110 + /*
50.111 + ** Send data to "partner" node
50.112 + */
50.113 + extra_rank=my_exchange_node.rank_extra_source;
50.114 + src_buf_current=(char *)src_buf;
50.115 + rc=MCA_PML_CALL(send(src_buf_current,
50.116 + count,dtype,ranks_in_comm[extra_rank],
50.117 + -OMPI_COMMON_TAG_ALLREDUCE,
50.118 + MCA_PML_BASE_SEND_STANDARD,
50.119 + comm));
50.120 + if( 0 > rc ) {
50.121 + goto Error;
50.122 + }
50.123 + }
50.124 + }
50.125 +
50.126 + current_data_extent=message_extent;
50.127 + current_data_count=count;
50.128 + src_buf_current=(char *)dest_buf+my_rank_in_group*message_extent;
50.129 + proc_block=1;
50.130 + local_data_start_rank=my_rank_in_group;
50.131 + /* loop over data exchanges */
50.132 + for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
50.133 +
50.134 + /* is the remote data read */
50.135 + pair_rank=my_exchange_node.rank_exchanges[exchange];
50.136 + msg_cnt=0;
50.137 +
50.138 + /*
50.139 + * Power of 2 data segment
50.140 + */
50.141 + /* post non-blocking receive */
50.142 + if(pair_rank > my_rank_in_group ){
50.143 + recv_iov[0].iov_base=src_buf_current+current_data_extent;
50.144 + recv_iov[0].iov_len=current_data_extent;
50.145 + iovec_len=1;
50.146 + remote_data_start_rank=local_data_start_rank+proc_block;
50.147 + remote_data_end_rank=remote_data_start_rank+proc_block-1;
50.148 + } else {
50.149 + recv_iov[0].iov_base=src_buf_current-current_data_extent;
50.150 + recv_iov[0].iov_len=current_data_extent;
50.151 + iovec_len=1;
50.152 + remote_data_start_rank=local_data_start_rank-proc_block;
50.153 + remote_data_end_rank=remote_data_start_rank+proc_block-1;
50.154 + }
50.155 + /* the data from the non power of 2 ranks */
50.156 + if(remote_data_start_rank<n_extra_nodes) {
50.157 + /* figure out how much data is at the remote rank */
50.158 + /* last rank with data */
50.159 + extra_start=remote_data_start_rank;
50.160 + extra_end=remote_data_end_rank;
50.161 + if(extra_end >= n_extra_nodes ) {
50.162 + /* if last rank exceeds the ranks with extra data,
50.163 + * adjust this.
50.164 + */
50.165 + extra_end=n_extra_nodes-1;
50.166 + }
50.167 + /* get the number of ranks whos data is to be grabbed */
50.168 + n_extra=extra_end-extra_start+1;
50.169 +
50.170 + recv_iov[1].iov_base=(char *)dest_buf+
50.171 + (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
50.172 + recv_iov[1].iov_len=n_extra*count;
50.173 + iovec_len=2;
50.174 + }
50.175 +
50.176 + rc=MCA_PML_CALL(irecv(recv_iov[0].iov_base,
50.177 + current_data_count,dtype,ranks_in_comm[pair_rank],
50.178 + -OMPI_COMMON_TAG_ALLREDUCE,
50.179 + comm,&(requests[msg_cnt])));
50.180 + if( 0 > rc ) {
50.181 + goto Error;
50.182 + }
50.183 + msg_cnt++;
50.184 +
50.185 + if(iovec_len > 1 ) {
50.186 + rc=MCA_PML_CALL(irecv(recv_iov[1].iov_base,
50.187 + recv_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
50.188 + -OMPI_COMMON_TAG_ALLREDUCE,
50.189 + comm,&(requests[msg_cnt])));
50.190 + if( 0 > rc ) {
50.191 + goto Error;
50.192 + }
50.193 + msg_cnt++;
50.194 + }
50.195 +
50.196 + /* post non-blocking send */
50.197 + send_iov[0].iov_base=src_buf_current;
50.198 + send_iov[0].iov_len=current_data_extent;
50.199 + iovec_len=1;
50.200 + /* the data from the non power of 2 ranks */
50.201 + if(local_data_start_rank<n_extra_nodes) {
50.202 + /* figure out how much data is at the remote rank */
50.203 + /* last rank with data */
50.204 + extra_start=local_data_start_rank;
50.205 + extra_end=extra_start+proc_block-1;
50.206 + if(extra_end >= n_extra_nodes ) {
50.207 + /* if last rank exceeds the ranks with extra data,
50.208 + * adjust this.
50.209 + */
50.210 + extra_end=n_extra_nodes-1;
50.211 + }
50.212 + /* get the number of ranks whos data is to be grabbed */
50.213 + n_extra=extra_end-extra_start+1;
50.214 +
50.215 + send_iov[1].iov_base=(char *)dest_buf+
50.216 + (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
50.217 + send_iov[1].iov_len=n_extra*count;
50.218 + iovec_len=2;
50.219 + }
50.220 +
50.221 + rc=MCA_PML_CALL(isend(send_iov[0].iov_base,
50.222 + current_data_count,dtype,ranks_in_comm[pair_rank],
50.223 + -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
50.224 + comm,&(requests[msg_cnt])));
50.225 + if( 0 > rc ) {
50.226 + goto Error;
50.227 + }
50.228 + msg_cnt++;
50.229 + if( iovec_len > 1 ) {
50.230 + rc=MCA_PML_CALL(isend(send_iov[1].iov_base,
50.231 + send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
50.232 + -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
50.233 + comm,&(requests[msg_cnt])));
50.234 + if( 0 > rc ) {
50.235 + goto Error;
50.236 + }
50.237 + msg_cnt++;
50.238 + }
50.239 +
50.240 + /* prepare the source buffer for the next iteration */
50.241 + if(pair_rank < my_rank_in_group ){
50.242 + src_buf_current-=current_data_extent;
50.243 + local_data_start_rank-=proc_block;
50.244 + }
50.245 + proc_block*=2;
50.246 + current_data_extent*=2;
50.247 + current_data_count*=2;
50.248 +
50.249 + /* wait on send and receive completion */
50.250 + ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
50.251 + }
50.252 +
50.253 + /* copy data in from the "extra" source, if need be */
50.254 + if(0 < my_exchange_node.n_extra_sources) {
50.255 +
50.256 + if ( EXTRA_NODE == my_exchange_node.node_type ) {
50.257 + /*
50.258 + ** receive the data
50.259 + ** */
50.260 + extra_rank=my_exchange_node.rank_extra_source;
50.261 +
50.262 + rc=MCA_PML_CALL(recv(dest_buf,
50.263 + count*n_peers,dtype,ranks_in_comm[extra_rank],
50.264 + -OMPI_COMMON_TAG_ALLREDUCE,
50.265 + comm,MPI_STATUSES_IGNORE));
50.266 + if(0 > rc ) {
50.267 + goto Error;
50.268 + }
50.269 + } else {
50.270 + /* send the data to the pair-rank outside of the power of 2 set
50.271 + ** of ranks
50.272 + */
50.273 +
50.274 + extra_rank=my_exchange_node.rank_extra_source;
50.275 + rc=MCA_PML_CALL(send(dest_buf,
50.276 + count*n_peers,dtype,ranks_in_comm[extra_rank],
50.277 + -OMPI_COMMON_TAG_ALLREDUCE,
50.278 + MCA_PML_BASE_SEND_STANDARD,
50.279 + comm));
50.280 + if( 0 > rc ) {
50.281 + goto Error;
50.282 + }
50.283 + }
50.284 + }
50.285 +
50.286 + /* return */
50.287 + return OMPI_SUCCESS;
50.288 +
50.289 +Error:
50.290 + return rc;
50.291 +}
51.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
51.2 +++ b/ompi/patterns/comm/allreduce.c Tue Feb 05 21:52:55 2013 +0000
51.3 @@ -0,0 +1,255 @@
51.4 +/*
51.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
51.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
51.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
51.8 + * All rights reserved.
51.9 + * $COPYRIGHT$
51.10 + *
51.11 + * Additional copyrights may follow
51.12 + *
51.13 + * $HEADER$
51.14 + */
51.15 +/** @file */
51.16 +
51.17 +#include "ompi_config.h"
51.18 +
51.19 +#include "ompi/constants.h"
51.20 +#include "ompi/op/op.h"
51.21 +#include "ompi/datatype/ompi_datatype.h"
51.22 +#include "ompi/communicator/communicator.h"
51.23 +#include "opal/include/opal/sys/atomic.h"
51.24 +#include "ompi/mca/pml/pml.h"
51.25 +#include "ompi/patterns/net/netpatterns.h"
51.26 +#include "coll_ops.h"
51.27 +#include "commpatterns.h"
51.28 +
51.29 +/**
51.30 + * All-reduce for contigous primitive types
51.31 + */
51.32 +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
51.33 + ompi_datatype_t *dtype, int my_rank_in_group,
51.34 + struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
51.35 + ompi_communicator_t *comm)
51.36 +{
51.37 + /* local variables */
51.38 + int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
51.39 + int pair_rank,exchange,extra_rank;
51.40 + netpatterns_pair_exchange_node_t my_exchange_node;
51.41 + int count_processed,count_this_stripe;
51.42 + size_t dt_size,dt_extent;
51.43 + char scratch_bufers[2][MAX_TMP_BUFFER];
51.44 + int send_buffer=0,recv_buffer=1;
51.45 + char *sbuf_current, *rbuf_current;
51.46 + ompi_request_t *requests[2];
51.47 +
51.48 + /* get size of data needed - same layout as user data, so that
51.49 + * we can apply the reudction routines directly on these buffers
51.50 + */
51.51 + rc = opal_datatype_type_size((opal_datatype_t *)dtype, &dt_size);
51.52 + if( OMPI_SUCCESS != rc ) {
51.53 + goto Error;
51.54 + }
51.55 + rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent);
51.56 + if( OMPI_SUCCESS != rc ) {
51.57 + goto Error;
51.58 + }
51.59 +
51.60 + /* 1 process special case */
51.61 + if(1 == n_peers) {
51.62 + /* place my data in the correct destination buffer */
51.63 + rc=ompi_datatype_copy_content_same_ddt(dtype,count,
51.64 + (char *)rbuf, (char *)sbuf);
51.65 + if( OMPI_SUCCESS != rc ) {
51.66 + goto Error;
51.67 + }
51.68 + return OMPI_SUCCESS;
51.69 + }
51.70 +
51.71 + /* number of data types copies that the scratch buffer can hold */
51.72 + n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_extent;
51.73 + if ( 0 == n_dts_per_buffer ) {
51.74 + rc=OMPI_ERROR;
51.75 + goto Error;
51.76 + }
51.77 +
51.78 + /* compute number of stripes needed to process this collective */
51.79 + n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
51.80 +
51.81 + /* get my reduction communication pattern */
51.82 + rc = netpatterns_setup_recursive_doubling_tree_node(n_peers,
51.83 + my_rank_in_group, &my_exchange_node);
51.84 + if(OMPI_SUCCESS != rc){
51.85 + return rc;
51.86 + }
51.87 +
51.88 + count_processed=0;
51.89 +
51.90 + /* get a pointer to the shared-memory working buffer */
51.91 + /* NOTE: starting with a rather synchronous approach */
51.92 + for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
51.93 +
51.94 + /* get number of elements to process in this stripe */
51.95 + count_this_stripe=n_dts_per_buffer;
51.96 + if( count_processed + count_this_stripe > count )
51.97 + count_this_stripe=count-count_processed;
51.98 +
51.99 + /* copy data from the input buffer into the temp buffer */
51.100 + sbuf_current=(char *)sbuf+count_processed*dt_extent;
51.101 + rc=ompi_datatype_copy_content_same_ddt(dtype,count_this_stripe,
51.102 + scratch_bufers[send_buffer], sbuf_current);
51.103 + if( OMPI_SUCCESS != rc ) {
51.104 + goto Error;
51.105 + }
51.106 +
51.107 + /* copy data in from the "extra" source, if need be */
51.108 + if(0 < my_exchange_node.n_extra_sources) {
51.109 +
51.110 + if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
51.111 +
51.112 + /*
51.113 + ** Receive data from extra node
51.114 + */
51.115 + extra_rank=my_exchange_node.rank_extra_source;
51.116 + rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
51.117 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.118 + -OMPI_COMMON_TAG_ALLREDUCE, comm,
51.119 + MPI_STATUSES_IGNORE));
51.120 + if( 0 > rc ) {
51.121 + fprintf(stderr," first recv failed in comm_allreduce_pml \n");
51.122 + fflush(stderr);
51.123 + goto Error;
51.124 + }
51.125 +
51.126 +
51.127 + /* apply collective operation to first half of the data */
51.128 + if( 0 < count_this_stripe ) {
51.129 + ompi_op_reduce(op,
51.130 + (void *)scratch_bufers[send_buffer],
51.131 + (void *)scratch_bufers[recv_buffer],
51.132 + count_this_stripe,dtype);
51.133 + }
51.134 +
51.135 +
51.136 + } else {
51.137 +
51.138 + /*
51.139 + ** Send data to "partner" node
51.140 + */
51.141 + extra_rank=my_exchange_node.rank_extra_source;
51.142 + rc=MCA_PML_CALL(send(scratch_bufers[send_buffer],
51.143 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.144 + -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
51.145 + comm));
51.146 + if( 0 > rc ) {
51.147 + fprintf(stderr," first send failed in comm_allreduce_pml \n");
51.148 + fflush(stderr);
51.149 + goto Error;
51.150 + }
51.151 + }
51.152 +
51.153 + /* change pointer to scratch buffer - this was we can send data
51.154 + ** that we have summed w/o a memory copy, and receive data into the
51.155 + ** other buffer, w/o fear of over writting data that has not yet
51.156 + ** completed being send
51.157 + */
51.158 + recv_buffer^=1;
51.159 + send_buffer^=1;
51.160 + }
51.161 +
51.162 + /* loop over data exchanges */
51.163 + for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
51.164 +
51.165 + /* is the remote data read */
51.166 + pair_rank=my_exchange_node.rank_exchanges[exchange];
51.167 +
51.168 + /* post non-blocking receive */
51.169 + rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer],
51.170 + count_this_stripe,dtype,ranks_in_comm[pair_rank],
51.171 + -OMPI_COMMON_TAG_ALLREDUCE,
51.172 + comm,&(requests[0])));
51.173 + if( 0 > rc ) {
51.174 + fprintf(stderr," irecv failed in comm_allreduce_pml at iterations %d \n",
51.175 + exchange);
51.176 + fflush(stderr);
51.177 + goto Error;
51.178 + }
51.179 +
51.180 + /* post non-blocking send */
51.181 + rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer],
51.182 + count_this_stripe,dtype, ranks_in_comm[pair_rank],
51.183 + -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
51.184 + comm,&(requests[1])));
51.185 + if( 0 > rc ) {
51.186 + fprintf(stderr," isend failed in comm_allreduce_pml at iterations %d \n",
51.187 + exchange);
51.188 + fflush(stderr);
51.189 + goto Error;
51.190 + }
51.191 + /* wait on send and receive completion */
51.192 + ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE);
51.193 +
51.194 + /* reduce the data */
51.195 + if( 0 < count_this_stripe ) {
51.196 + ompi_op_reduce(op,
51.197 + (void *)scratch_bufers[send_buffer],
51.198 + (void *)scratch_bufers[recv_buffer],
51.199 + count_this_stripe,dtype);
51.200 + }
51.201 + /* get ready for next step */
51.202 + recv_buffer^=1;
51.203 + send_buffer^=1;
51.204 +
51.205 + }
51.206 +
51.207 + /* copy data in from the "extra" source, if need be */
51.208 + if(0 < my_exchange_node.n_extra_sources) {
51.209 +
51.210 + if ( EXTRA_NODE == my_exchange_node.node_type ) {
51.211 + /*
51.212 + ** receive the data
51.213 + ** */
51.214 + extra_rank=my_exchange_node.rank_extra_source;
51.215 + rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
51.216 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.217 + -OMPI_COMMON_TAG_ALLREDUCE, comm,
51.218 + MPI_STATUSES_IGNORE));
51.219 + if( 0 > rc ) {
51.220 + fprintf(stderr," last recv failed in comm_allreduce_pml \n");
51.221 + fflush(stderr);
51.222 + goto Error;
51.223 + }
51.224 +
51.225 + recv_buffer^=1;
51.226 + send_buffer^=1;
51.227 + } else {
51.228 + /* send the data to the pair-rank outside of the power of 2 set
51.229 + ** of ranks
51.230 + */
51.231 +
51.232 + extra_rank=my_exchange_node.rank_extra_source;
51.233 + rc=MCA_PML_CALL(send((char *)scratch_bufers[send_buffer],
51.234 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.235 + -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
51.236 + comm));
51.237 + if( 0 > rc ) {
51.238 + fprintf(stderr," last send failed in comm_allreduce_pml \n");
51.239 + fflush(stderr);
51.240 + goto Error;
51.241 + }
51.242 + }
51.243 + }
51.244 +
51.245 + /* copy data from the temp buffer into the output buffer */
51.246 + rbuf_current = (char *) rbuf + count_processed * dt_size;
51.247 + memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size);
51.248 +
51.249 + /* update the count of elements processed */
51.250 + count_processed += count_this_stripe;
51.251 + }
51.252 +
51.253 + /* return */
51.254 + return OMPI_SUCCESS;
51.255 +
51.256 +Error:
51.257 + return rc;
51.258 +}
52.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
52.2 +++ b/ompi/patterns/comm/bcast.c Tue Feb 05 21:52:55 2013 +0000
52.3 @@ -0,0 +1,97 @@
52.4 +/*
52.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
52.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
52.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
52.8 + * All rights reserved.
52.9 + * $COPYRIGHT$
52.10 + *
52.11 + * Additional copyrights may follow
52.12 + *
52.13 + * $HEADER$
52.14 + */
52.15 +/** @file */
52.16 +
52.17 +#include "ompi_config.h"
52.18 +
52.19 +#include "ompi/constants.h"
52.20 +#include "ompi/op/op.h"
52.21 +#include "ompi/datatype/ompi_datatype.h"
52.22 +#include "ompi/communicator/communicator.h"
52.23 +#include "opal/include/opal/sys/atomic.h"
52.24 +#include "ompi/mca/pml/pml.h"
52.25 +#include "ompi/patterns/net/netpatterns.h"
52.26 +#include "coll_ops.h"
52.27 +
52.28 +/**
52.29 + * Bcast - subgroup in communicator
52.30 + * This is a very simple algorithm - binary tree, transmitting the full
52.31 + * message at each step.
52.32 + */
52.33 +OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
52.34 + ompi_datatype_t *dtype, int my_rank_in_group,
52.35 + int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
52.36 +{
52.37 + /* local variables */
52.38 + int rc=OMPI_SUCCESS,msg_cnt,i;
52.39 + ompi_request_t *requests[2];
52.40 + int node_rank, peer_rank;
52.41 + netpatterns_tree_node_t node_data;
52.42 +
52.43 + /*
52.44 + * shift rank to root==0 tree
52.45 + */
52.46 + node_rank=(my_rank_in_group-root+n_peers)%n_peers;
52.47 +
52.48 + /*
52.49 + * compute my communication pattern - binary tree
52.50 + */
52.51 + rc=netpatterns_setup_narray_tree(2, node_rank, n_peers,
52.52 + &node_data);
52.53 + if( OMPI_SUCCESS != rc ) {
52.54 + goto Error;
52.55 + }
52.56 +
52.57 + /* 1 process special case */
52.58 + if(1 == n_peers) {
52.59 + return OMPI_SUCCESS;
52.60 + }
52.61 +
52.62 + /* if I have parents - wait on the data to arrive */
52.63 + if(node_data.n_parents) {
52.64 + /* I will have only 1 parent */
52.65 + peer_rank=node_data.parent_rank;
52.66 + peer_rank=(peer_rank+root)%n_peers;
52.67 + /* translate back to actual rank */
52.68 + rc=MCA_PML_CALL(recv(buffer, count,dtype,peer_rank,
52.69 + -OMPI_COMMON_TAG_BCAST, comm, MPI_STATUSES_IGNORE));
52.70 + if( 0 > rc ) {
52.71 + goto Error;
52.72 + }
52.73 + }
52.74 +
52.75 + /* send the data to my children */
52.76 + msg_cnt=0;
52.77 + for(i=0 ; i < node_data.n_children ; i++ ) {
52.78 + peer_rank=node_data.children_ranks[i];
52.79 + peer_rank=(peer_rank+root)%n_peers;
52.80 + rc=MCA_PML_CALL(isend(buffer,
52.81 + count,dtype,peer_rank,
52.82 + -OMPI_COMMON_TAG_BCAST,MCA_PML_BASE_SEND_STANDARD,
52.83 + comm,&(requests[msg_cnt])));
52.84 + if( 0 > rc ) {
52.85 + goto Error;
52.86 + }
52.87 + msg_cnt++;
52.88 + }
52.89 + /* wait for send completion */
52.90 + if(msg_cnt) {
52.91 + /* wait on send and receive completion */
52.92 + ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
52.93 + }
52.94 +
52.95 + /* return */
52.96 + return OMPI_SUCCESS;
52.97 +
52.98 +Error:
52.99 + return rc;
52.100 +}
53.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
53.2 +++ b/ompi/patterns/comm/coll_ops.h Tue Feb 05 21:52:55 2013 +0000
53.3 @@ -0,0 +1,51 @@
53.4 +/*
53.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
53.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
53.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
53.8 + * All rights reserved.
53.9 + * $COPYRIGHT$
53.10 + *
53.11 + * Additional copyrights may follow
53.12 + *
53.13 + * $HEADER$
53.14 + */
53.15 +
53.16 +#ifndef COMM_COLL_OP_TYPES_H
53.17 +#define COMM_COLL_OP_TYPES_H
53.18 +
53.19 +#include "ompi_config.h"
53.20 +#include "ompi/communicator/communicator.h"
53.21 +#include "ompi/datatype/ompi_datatype.h"
53.22 +#include "ompi/proc/proc.h"
53.23 +
53.24 +BEGIN_C_DECLS
53.25 +
53.26 +#define OMPI_COMMON_TAG_ALLREDUCE 99
53.27 +#define OMPI_COMMON_TAG_BCAST 98
53.28 +
53.29 +
53.30 +
53.31 +
53.32 +OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
53.33 + ompi_datatype_t *dtype, int my_rank_in_group, int n_peers,
53.34 + int *ranks_in_comm,ompi_communicator_t *comm);
53.35 +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
53.36 + ompi_datatype_t *dtype, int my_rank_in_group,
53.37 + struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
53.38 + ompi_communicator_t *comm);
53.39 +OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
53.40 + ompi_datatype_t *dtype, int my_rank_in_group,
53.41 + int n_peers, int *ranks_in_comm,ompi_communicator_t
53.42 + *comm);
53.43 +
53.44 +/* reduction operations supported */
53.45 +#define OP_SUM 1
53.46 +#define OP_MAX 2
53.47 +#define OP_MIN 3
53.48 +
53.49 +#define TYPE_INT4 1
53.50 +
53.51 +
53.52 +END_C_DECLS
53.53 +
53.54 +#endif /* COMM_COLL_OP_TYPES_H */
54.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
54.2 +++ b/ompi/patterns/comm/commpatterns.h Tue Feb 05 21:52:55 2013 +0000
54.3 @@ -0,0 +1,22 @@
54.4 +/*
54.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
54.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
54.7 + * $COPYRIGHT$
54.8 + *
54.9 + * Additional copyrights may follow
54.10 + *
54.11 + * $HEADER$
54.12 + */
54.13 +
54.14 +#ifndef COMM_NETPATTERNS_H
54.15 +#define COMM_NETPATTERNS_H
54.16 +
54.17 +#include "ompi_config.h"
54.18 +
54.19 +BEGIN_C_DECLS
54.20 +
54.21 +#define MAX_TMP_BUFFER 8192
54.22 +
54.23 +END_C_DECLS
54.24 +
54.25 +#endif /* COMM_NETPATTERNS_H */
55.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
55.2 +++ b/ompi/patterns/net/Makefile.am Tue Feb 05 21:52:55 2013 +0000
55.3 @@ -0,0 +1,18 @@
55.4 +# Copyright (c) 2013 Oak Ridge National Laboratory. All rights reserved.
55.5 +# $COPYRIGHT$
55.6 +#
55.7 +# Additional copyrights may follow
55.8 +#
55.9 +# $HEADER$
55.10 +#
55.11 +
55.12 +headers += \
55.13 + patterns/net/netpatterns.h \
55.14 + patterns/net/netpatterns_knomial_tree.h \
55.15 + patterns/net/coll_ops.h
55.16 +
55.17 +libmpi_la_SOURCES += \
55.18 + patterns/net/netpatterns_base.c \
55.19 + patterns/net/netpatterns_multinomial_tree.c \
55.20 + patterns/net/netpatterns_nary_tree.c \
55.21 + patterns/net/netpatterns_knomial_tree.c
56.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
56.2 +++ b/ompi/patterns/net/allreduce.c Tue Feb 05 21:52:55 2013 +0000
56.3 @@ -0,0 +1,347 @@
56.4 +/*
56.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
56.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
56.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
56.8 + * All rights reserved.
56.9 + * $COPYRIGHT$
56.10 + *
56.11 + * Additional copyrights may follow
56.12 + *
56.13 + * $HEADER$
56.14 + */
56.15 +/** @file */
56.16 +
56.17 +#include "ompi_config.h"
56.18 +
56.19 +#include "ompi/constants.h"
56.20 +#include "coll_sm2.h"
56.21 +#include "ompi/op/op.h"
56.22 +#include "ompi/datatype/ompi_datatype.h"
56.23 +#include "ompi/communicator/communicator.h"
56.24 +#include "ompi/mca/rte/rte.h"
56.25 +
56.26 +void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg,
56.27 + int count, ompi_rml_tag_t tag, void* cbdata)
56.28 +{
56.29 + /* set send completion flag */
56.30 + *(int *)cbdata=1;
56.31 +}
56.32 +
56.33 +
56.34 +void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg,
56.35 + int count, ompi_rml_tag_t tag, void* cbdata)
56.36 +{
56.37 + /* set receive completion flag */
56.38 + MB();
56.39 + *(int *)cbdata=1;
56.40 +}
56.41 +
56.42 +
56.43 +static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int count,
56.44 + int data_type)
56.45 +{
56.46 + /* local variables */
56.47 + int ret;
56.48 +
56.49 + /* op type */
56.50 + switch (op_type) {
56.51 +
56.52 + case OP_SUM:
56.53 +
56.54 +
56.55 + switch (data_type) {
56.56 + case TYPE_INT4:
56.57 + int *int_src_ptr=(int *)src_ptr;
56.58 + int *int_src_dst_ptr=(int *)src_dst_ptr;
56.59 + int cnt;
56.60 + for(cnt=0 ; cnt < count ; ) {
56.61 + (*(int_src_dst_ptr))+=(*(int_src_ptr));
56.62 + break;
56.63 + default:
56.64 + ret=OMPI_ERROR;
56.65 + goto Error;
56.66 + }
56.67 +
56.68 + break;
56.69 +
56.70 + default:
56.71 + ret=OMPI_ERROR;
56.72 + goto Error;
56.73 + }
56.74 +Error:
56.75 + return ret;
56.76 +}
56.77 +
56.78 +/**
56.79 + * All-reduce for contigous primitive types
56.80 + */
56.81 +static
56.82 +comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
56.83 + int op_type, opal_list_t *peers)
56.84 +{
56.85 + /* local variables */
56.86 + int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
56.87 + int pair_rank,exchange,extra_rank;
56.88 + int index_read,index_write;
56.89 + netpatterns_pair_exchange_node_t my_exchange_node;
56.90 + int my_rank,count_processed,count_this_stripe;
56.91 + size_t n_peers,message_extent,len_data_buffer;
56.92 + size_t dt_size;
56.93 + long long tag, base_tag;
56.94 + sm_work_buffer_t *sm_buffer_desc;
56.95 + opal_list_item_t *item;
56.96 + char scratch_bufers[2][MAX_TMP_BUFFER];
56.97 + int send_buffer=0;recv_buffer=1;
56.98 + char *sbuf_current,*rbuf_current;
56.99 + ompi_proc_t **proc_array;
56.100 + struct iovec send_iov, recv_iov;
56.101 + volatile int *recv_done, *send_done;
56.102 + int recv_completion_flag, send_completion_flag;
56.103 + int data_type;
56.104 +
56.105 + /* get size of data needed - same layout as user data, so that
56.106 + * we can apply the reudction routines directly on these buffers
56.107 + */
56.108 + rc=opal_datatype_type_size(dtype, &dt_size);
56.109 + if( OMPI_SUCCESS != rc ) {
56.110 + goto Error;
56.111 + }
56.112 + message_extent=dt_extent*count;
56.113 +
56.114 + /* lenght of control and data regions */
56.115 + len_data_buffer=sm_module->data_memory_per_proc_per_segment;
56.116 +
56.117 + /* number of data types copies that the scratch buffer can hold */
56.118 + n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_size;
56.119 + if ( 0 == n_dts_per_buffer ) {
56.120 + rc=OMPI_ERROR;
56.121 + goto Error;
56.122 + }
56.123 +
56.124 + /* need a read and a write buffer for a pair-wise exchange of data */
56.125 + n_dts_per_buffer/=2;
56.126 + len_data_buffer=n_dts_per_buffer*dt_size;
56.127 +
56.128 + /* compute number of stripes needed to process this collective */
56.129 + n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
56.130 +
56.131 + /* */
56.132 + n_peers=opal_list_get_size(peers);
56.133 +
56.134 + /* get my rank in the list */
56.135 + my_rank=0;
56.136 + for (item = opal_list_get_first(peers) ;
56.137 + item != opal_list_get_end(peers) ;
56.138 + item = opal_list_get_next(peers)) {
56.139 + if(ompi_proc_local()==(ompi_proc_t *)item){
56.140 + /* this is the pointer to my proc strucuture */
56.141 + break;
56.142 + }
56.143 + my_rank++;
56.144 + }
56.145 + proc_array=(ompi_proc_t **)malloc(sizeof(ompi_proc_t *)*n_peers);
56.146 + if( NULL == proc_array) {
56.147 + goto Error;
56.148 + }
56.149 + cnt=0;
56.150 + for (item = opal_list_get_first(peers) ;
56.151 + item != opal_list_get_end(peers) ;
56.152 + item = opal_list_get_next(peers)) {
56.153 + proc_array[cnt]=(ompi_proc_t *)item;
56.154 + cnt++;
56.155 + }
56.156 +
56.157 + /* get my reduction communication pattern */
56.158 + ret=netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node);
56.159 + if(OMPI_SUCCESS != ret){
56.160 + return ret;
56.161 + }
56.162 +
56.163 + /* setup flags for non-blocking communications */
56.164 + recv_done=&recv_completion_flag;
56.165 + send_done=&send_completion_flag;
56.166 +
56.167 + /* set data type */
56.168 + if(&opal_datatype_int4==dtype) {
56.169 + data_type=TYPE_INT4;
56.170 + }
56.171 +
56.172 + count_processed=0;
56.173 +
56.174 + /* get a pointer to the shared-memory working buffer */
56.175 + /* NOTE: starting with a rather synchronous approach */
56.176 + for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
56.177 +
56.178 + /* get number of elements to process in this stripe */
56.179 + count_this_stripe=n_dts_per_buffer;
56.180 + if( count_processed + count_this_stripe > count )
56.181 + count_this_stripe=count-count_processed;
56.182 +
56.183 + /* copy data from the input buffer into the temp buffer */
56.184 + sbuf_current=(char *)sbuf+count_processed*dt_size;
56.185 + memcopy(scratch_bufers[send_buffer],sbuf_current,count_this_stripe*dt_size);
56.186 +
56.187 + /* copy data in from the "extra" source, if need be */
56.188 + if(0 < my_exchange_node->n_extra_sources) {
56.189 +
56.190 + if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
56.191 +
56.192 + /*
56.193 + ** Receive data from extra node
56.194 + */
56.195 +
56.196 + extra_rank=my_exchange_node.rank_extra_source;
56.197 + recv_iov.iov_base=scratch_bufers[recv_buffer];
56.198 + recv_iov.iov_len=count_this_stripe*dt_size;
56.199 + rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
56.200 + OMPI_RML_TAG_ALLREDUCE , 0);
56.201 + if(OMPI_SUCCESS != rc ) {
56.202 + goto Error;
56.203 + }
56.204 +
56.205 + /* apply collective operation to first half of the data */
56.206 + if( 0 < count_this_stripe ) {
56.207 + op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
56.208 + (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
56.209 + }
56.210 +
56.211 +
56.212 + } else {
56.213 +
56.214 + /*
56.215 + ** Send data to "partner" node
56.216 + */
56.217 + extra_rank=my_exchange_node.rank_extra_source;
56.218 + send_iov.iov_base=scratch_bufers[send_buffer];
56.219 + send_iov.iov_len=count_this_stripe*dt_size;
56.220 + rc = ompi_rte_send(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
56.221 + OMPI_RML_TAG_ALLREDUCE , 0);
56.222 + if(OMPI_SUCCESS != rc ) {
56.223 + goto Error;
56.224 + }
56.225 + }
56.226 +
56.227 + /* change pointer to scratch buffer - this was we can send data
56.228 + ** that we have summed w/o a memory copy, and receive data into the
56.229 + ** other buffer, w/o fear of over writting data that has not yet
56.230 + ** completed being send
56.231 + */
56.232 + recv_buffer^=1;
56.233 + send_buffer^=1;
56.234 + }
56.235 +
56.236 + MB();
56.237 + /*
56.238 + * Signal parent that data is ready
56.239 + */
56.240 + tag=base_tag+1;
56.241 + my_ctl_pointer->flag=tag;
56.242 +
56.243 + /* loop over data exchanges */
56.244 + for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
56.245 +
56.246 + /* debug
56.247 + t4=opal_sys_timer_get_cycles();
56.248 + end debug */
56.249 +
56.250 +
56.251 + my_write_pointer=my_tmp_data_buffer[index_write];
56.252 + my_read_pointer=my_tmp_data_buffer[index_read];
56.253 +
56.254 + /* is the remote data read */
56.255 + pair_rank=my_exchange_node->rank_exchanges[exchange];
56.256 +
56.257 + *recv_done=0;
56.258 + *send_done=0;
56.259 + MB();
56.260 +
56.261 + /* post non-blocking receive */
56.262 + recv_iov.iov_base=scratch_bufers[send_buffer];
56.263 + recv_iov.iov_len=count_this_stripe*dt_size;
56.264 + rc = ompi_rte_recv_nb(&(proc_array[extra_rank]->proc_name), recv_iov, 1,
56.265 + OMPI_RML_TAG_ALLREDUCE , 0, recv_completion, recv_done);
56.266 +
56.267 + /* post non-blocking send */
56.268 + send_iov.iov_base=scratch_bufers[send_buffer];
56.269 + send_iov.iov_len=count_this_stripe*dt_size;
56.270 + rc = ompi_rte_send_nb(&(proc_array[extra_rank]->proc_name), send_iov, 1,
56.271 + OMPI_RML_TAG_ALLREDUCE , 0, send_completion, send_done);
56.272 +
56.273 + /* wait on receive completion */
56.274 + while(!(*recv_done) ) {
56.275 + opal_progress();
56.276 + }
56.277 +
56.278 + /* reduce the data */
56.279 + if( 0 < count_this_stripe ) {
56.280 + op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
56.281 + (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
56.282 + }
56.283 +
56.284 +
56.285 + /* get ready for next step */
56.286 + index_read=(exchange&1);
56.287 + index_write=((exchange+1)&1);
56.288 +
56.289 + /* wait on send completion */
56.290 + while(!(*send_done) ) {
56.291 + opal_progress();
56.292 + }
56.293 +
56.294 + }
56.295 +
56.296 + /* copy data in from the "extra" source, if need be */
56.297 + if(0 < my_exchange_node->n_extra_sources) {
56.298 +
56.299 + if ( EXTRA_NODE == my_exchange_node->node_type ) {
56.300 + /*
56.301 + ** receive the data
56.302 + ** */
56.303 + extra_rank=my_exchange_node->rank_extra_source;
56.304 +
56.305 + recv_iov.iov_base=scratch_bufers[recv_buffer];
56.306 + recv_iov.iov_len=count_this_stripe*dt_size;
56.307 + rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
56.308 + OMPI_RML_TAG_ALLREDUCE , 0);
56.309 + if(OMPI_SUCCESS != rc ) {
56.310 + goto Error;
56.311 + }
56.312 +
56.313 + } else {
56.314 + /* send the data to the pair-rank outside of the power of 2 set
56.315 + ** of ranks
56.316 + */
56.317 +
56.318 + extra_rank=my_exchange_node->rank_extra_source;
56.319 + send_iov.iov_base=scratch_bufers[recv_buffer];
56.320 + send_iov.iov_len=count_this_stripe*dt_size;
56.321 + rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
56.322 + OMPI_RML_TAG_ALLREDUCE , 0);
56.323 + if(OMPI_SUCCESS != rc ) {
56.324 + goto Error;
56.325 + }
56.326 + }
56.327 + }
56.328 +
56.329 + /* copy data into the destination buffer */
56.330 + rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
56.331 + (char *)((char *)rbuf+dt_extent*count_processed),
56.332 + (char *)my_write_pointer);
56.333 + if( 0 != rc ) {
56.334 + return OMPI_ERROR;
56.335 + }
56.336 +
56.337 + /* copy data from the temp buffer into the output buffer */
56.338 + rbuf_current=(char *)rbuf+count_processed*dt_size;
56.339 + memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size);
56.340 +
56.341 + /* update the count of elements processed */
56.342 + count_processed+=count_this_stripe;
56.343 + }
56.344 +
56.345 + /* return */
56.346 + return rc;
56.347 +
56.348 +Error:
56.349 + return rc;
56.350 +}
57.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
57.2 +++ b/ompi/patterns/net/coll_ops.h Tue Feb 05 21:52:55 2013 +0000
57.3 @@ -0,0 +1,29 @@
57.4 +/*
57.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
57.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
57.7 + * $COPYRIGHT$
57.8 + *
57.9 + * Additional copyrights may follow
57.10 + *
57.11 + * $HEADER$
57.12 + */
57.13 +
57.14 +#ifndef COMM_OP_TYPES_H
57.15 +#define COMM_OP_TYPES_H
57.16 +
57.17 +#include "ompi_config.h"
57.18 +
57.19 +BEGIN_C_DECLS
57.20 +
57.21 +int comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
57.22 + int op, opal_list_t *peers);
57.23 +
57.24 +/* reduction operations supported */
57.25 +#define OP_SUM 1
57.26 +
57.27 +#define TYPE_INT4 1
57.28 +
57.29 +
57.30 +END_C_DECLS
57.31 +
57.32 +#endif /* COMM_OP_TYPES_H */
58.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
58.2 +++ b/ompi/patterns/net/netpatterns.h Tue Feb 05 21:52:55 2013 +0000
58.3 @@ -0,0 +1,150 @@
58.4 +/*
58.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
58.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
58.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
58.8 + * All rights reserved.
58.9 + * $COPYRIGHT$
58.10 + *
58.11 + * Additional copyrights may follow
58.12 + *
58.13 + * $HEADER$
58.14 + */
58.15 +
58.16 +#ifndef COMM_PATTERNS_H
58.17 +#define COMM_PATTERNS_H
58.18 +
58.19 +#include "ompi_config.h"
58.20 +
58.21 +#include "ompi/mca/rte/rte.h"
58.22 +#include "netpatterns_knomial_tree.h"
58.23 +
58.24 +BEGIN_C_DECLS
58.25 +
58.26 +int netpatterns_base_err(const char* fmt, ...);
58.27 +int netpatterns_register_mca_params(void);
58.28 +
58.29 +#if OPAL_ENABLE_DEBUG
58.30 +extern int netpatterns_base_verbose; /* disabled by default */
58.31 +OMPI_DECLSPEC extern int netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
58.32 +#define NETPATTERNS_VERBOSE(args) \
58.33 + do { \
58.34 + if(netpatterns_base_verbose > 0) { \
58.35 + netpatterns_base_err("[%s]%s[%s:%d:%s] ",\
58.36 + ompi_process_info.nodename, \
58.37 + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \
58.38 + __FILE__, __LINE__, __func__); \
58.39 + netpatterns_base_err args; \
58.40 + netpatterns_base_err("\n"); \
58.41 + } \
58.42 + } while(0);
58.43 +#else
58.44 +#define NETPATTERNS_VERBOSE(args)
58.45 +#endif
58.46 +
58.47 +#define FIND_BASE(base,myid,level,k) \
58.48 + do { \
58.49 + int temp = 1; \
58.50 + int jj; \
58.51 + int knt2; \
58.52 + \
58.53 + base = 0; \
58.54 + for( jj = 0; jj < level; jj++) {\
58.55 + temp *= k; \
58.56 + } \
58.57 + knt2 = 1; \
58.58 + while(myid >= knt2*temp){ \
58.59 + knt2++; \
58.60 + } \
58.61 + base = knt2*temp - temp; \
58.62 + } while(0) \
58.63 +
58.64 +
58.65 +
58.66 +
58.67 +/* enum for node type */
58.68 +enum {
58.69 + ROOT_NODE,
58.70 + LEAF_NODE,
58.71 + INTERIOR_NODE
58.72 +};
58.73 +
58.74 +
58.75 +/*
58.76 + * N-order tree node description
58.77 + */
58.78 +struct netpatterns_tree_node_t {
58.79 + /* my rank within the group */
58.80 + int my_rank;
58.81 + /* my node type - root, leaf, or interior */
58.82 + int my_node_type;
58.83 + /* number of nodes in the tree */
58.84 + int tree_size;
58.85 + /* number of parents (0/1) */
58.86 + int n_parents;
58.87 + /* number of children */
58.88 + int n_children;
58.89 + /* parent rank within the group */
58.90 + int parent_rank;
58.91 + /* chidren ranks within the group */
58.92 + int *children_ranks;
58.93 +};
58.94 +typedef struct netpatterns_tree_node_t netpatterns_tree_node_t;
58.95 +
58.96 +struct netpatterns_k_exchange_node_t;
58.97 +/*
58.98 + * N-order + knominal tree node description
58.99 + */
58.100 +struct netpatterns_narray_knomial_tree_node_t {
58.101 + /* my rank within the group */
58.102 + int my_rank;
58.103 + /* my node type - root, leaf, or interior */
58.104 + int my_node_type;
58.105 + /* number of nodes in the tree */
58.106 + int tree_size;
58.107 + /* number of parents (0/1) */
58.108 + int n_parents;
58.109 + /* number of children */
58.110 + int n_children;
58.111 + /* parent rank within the group */
58.112 + int parent_rank;
58.113 + /* chidren ranks within the group */
58.114 + int *children_ranks;
58.115 + /* Total number of ranks on this specific level */
58.116 + int level_size;
58.117 + /* Rank on this node inside of level */
58.118 + int rank_on_level;
58.119 + /* Knomial recursive gather information */
58.120 + struct netpatterns_k_exchange_node_t k_node;
58.121 +};
58.122 +typedef struct netpatterns_narray_knomial_tree_node_t
58.123 +netpatterns_narray_knomial_tree_node_t;
58.124 +
58.125 +
58.126 +/* Init code for common_netpatterns */
58.127 +OMPI_DECLSPEC int netpatterns_init(void);
58.128 +
58.129 +/* setup an n-array tree */
58.130 +OMPI_DECLSPEC int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
58.131 + netpatterns_tree_node_t *my_node);
58.132 +/* setup an n-array tree with k-nomial levels */
58.133 +OMPI_DECLSPEC int netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes,
58.134 + netpatterns_narray_knomial_tree_node_t *my_node);
58.135 +
58.136 +/* setup an multi-nomial tree - for each node in the tree
58.137 + * this returns it's parent, and it's children
58.138 + */
58.139 +OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
58.140 + netpatterns_tree_node_t *tree_nodes);
58.141 +
58.142 +OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(int tree_order,
58.143 + int num_nodes, netpatterns_tree_node_t **tree_nodes);
58.144 +
58.145 +/* calculate the nearest power of radix that is equal to or greater
58.146 + * than size, with the specified radix. The resulting tree is of
58.147 + * depth n_lvls.
58.148 + */
58.149 +OMPI_DECLSPEC int roundup_to_power_radix( int radix, int size, int *n_lvls );
58.150 +
58.151 +END_C_DECLS
58.152 +
58.153 +#endif /* COMM_PATTERNS_H */
59.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
59.2 +++ b/ompi/patterns/net/netpatterns_base.c Tue Feb 05 21:52:55 2013 +0000
59.3 @@ -0,0 +1,53 @@
59.4 +/*
59.5 + *
59.6 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
59.7 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
59.8 + * $COPYRIGHT$
59.9 + *
59.10 + * Additional copyrights may follow
59.11 + *
59.12 + * $HEADER$
59.13 + */
59.14 +#include "opal/mca/base/mca_base_param.h"
59.15 +#include "ompi/include/ompi/constants.h"
59.16 +#include "netpatterns.h"
59.17 +
59.18 +int netpatterns_base_verbose = 0; /* disabled by default */
59.19 +
59.20 +int netpatterns_register_mca_params(void)
59.21 +{
59.22 + mca_base_param_reg_int_name("common",
59.23 + "netpatterns_base_verbose",
59.24 + "Verbosity level of the NETPATTERNS framework",
59.25 + false, false,
59.26 + 0,
59.27 + &netpatterns_base_verbose);
59.28 +
59.29 + return OMPI_SUCCESS;
59.30 +}
59.31 +
59.32 +int netpatterns_base_err(const char* fmt, ...)
59.33 +{
59.34 + va_list list;
59.35 + int ret;
59.36 +
59.37 + va_start(list, fmt);
59.38 + ret = vfprintf(stderr, fmt, list);
59.39 + va_end(list);
59.40 + return ret;
59.41 +}
59.42 +
59.43 +int netpatterns_init(void)
59.44 +{
59.45 +/* There is no component for common_netpatterns so every component that uses it
59.46 + should call netpatterns_init, still we want to run it only once */
59.47 +static int was_called = 0;
59.48 +
59.49 + if (0 == was_called) {
59.50 + was_called = 1;
59.51 +
59.52 + return netpatterns_register_mca_params();
59.53 + }
59.54 +
59.55 + return OMPI_SUCCESS;
59.56 +}
60.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
60.2 +++ b/ompi/patterns/net/netpatterns_knomial_tree.c Tue Feb 05 21:52:55 2013 +0000
60.3 @@ -0,0 +1,935 @@
60.4 +/*
60.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
60.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
60.7 +* $COPYRIGHT$
60.8 + *
60.9 + * Additional copyrights may follow
60.10 + *
60.11 + * $HEADER$
60.12 + */
60.13 +
60.14 +#include "ompi_config.h"
60.15 +#ifdef HAVE_UNISTD_H
60.16 +#include <unistd.h>
60.17 +#endif
60.18 +#include <sys/types.h>
60.19 +#ifdef HAVE_SYS_MMAN_H
60.20 +#include <sys/mman.h>
60.21 +#endif
60.22 +#include <fcntl.h>
60.23 +#include <stdlib.h>
60.24 +#include <assert.h>
60.25 +
60.26 +#include "ompi/constants.h"
60.27 +
60.28 +#include "ompi/mca/rte/rte.h"
60.29 +
60.30 +#include "netpatterns.h"
60.31 +
60.32 +/* setup recursive doubleing tree node */
60.33 +
60.34 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node(
60.35 + int num_nodes, int node_rank, int tree_order, int *hier_ranks,
60.36 + netpatterns_k_exchange_node_t *exchange_node)
60.37 +{
60.38 + /* local variables */
60.39 + int i, j, cnt, i_temp;
60.40 + int knt,knt2,kk, ex_node, stray;
60.41 + int n_levels,pow_k;
60.42 + int k_temp1;
60.43 + int k_temp2;
60.44 + int myid, reindex_myid = 0;
60.45 + int base, peer_base,base_temp;
60.46 + int peer;
60.47 + int *prev_data = NULL;
60.48 + int *current_data = NULL;
60.49 + int *group_info = NULL;
60.50 +
60.51 +
60.52 + NETPATTERNS_VERBOSE(
60.53 + ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
60.54 + num_nodes, node_rank, tree_order));
60.55 +
60.56 + assert(num_nodes > 1);
60.57 + assert(tree_order > 1);
60.58 + if (tree_order > num_nodes) {
60.59 + tree_order = num_nodes;
60.60 + }
60.61 +
60.62 + /* k-nomial radix */
60.63 + exchange_node->tree_order = tree_order;
60.64 +
60.65 + /* Calculate the number of levels in the tree for
60.66 + * the largest power of tree_order less than or
60.67 + * equal to the group size
60.68 + */
60.69 + n_levels = 0;
60.70 + cnt=1;
60.71 + while ( num_nodes > cnt ) {
60.72 + cnt *= tree_order;
60.73 + n_levels++;
60.74 + }
60.75 + /* this is the actual number of recusive k-ing steps
60.76 + * we will perform, the last step may not be a full
60.77 + * step depending on the outcome of the next conditional
60.78 + */
60.79 + pow_k = n_levels;
60.80 +
60.81 + /* figure out the largest power of tree_order that is less than or equal to
60.82 + * num_nodes */
60.83 + if ( cnt > num_nodes) {
60.84 + cnt /= tree_order;
60.85 + n_levels--;
60.86 + }
60.87 +
60.88 + /*exchange_node->log_tree_order = n_levels;*/
60.89 + exchange_node->log_tree_order = pow_k;
60.90 + exchange_node->n_largest_pow_tree_order = cnt;
60.91 +
60.92 +
60.93 + /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
60.94 + /* I don't think we need to cache this info this group_info array */
60.95 + group_info = (int *) calloc(pow_k , sizeof(int));
60.96 + group_info[0] = num_nodes/tree_order;
60.97 + /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
60.98 + for ( i = 1; i < pow_k; i ++) {
60.99 + group_info[i] = group_info[i-1]/tree_order;
60.100 + /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
60.101 +
60.102 + }
60.103 +
60.104 + /* find number of incomplete groups and number of ranks belonging to those ranks */
60.105 + knt=0;
60.106 + while (knt <= (pow_k - 1) && group_info[knt] > 0) {
60.107 + knt++;
60.108 + }
60.109 + knt--;
60.110 + /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
60.111 +
60.112 + /* k_temp is a synonym for cnt which is the largest full power of k group */
60.113 + /* now, start the calculation to find the first stray rank aka "extra" rank */
60.114 + stray = 0;
60.115 + /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt,
60.116 + cnt*group_info[knt],
60.117 + num_nodes - cnt*group_info[knt]);*/
60.118 +
60.119 +
60.120 + /* cache this info, it's muy importante */
60.121 + stray = cnt*group_info[knt];
60.122 + exchange_node->k_nomial_stray = stray;
60.123 +
60.124 +
60.125 +
60.126 + /* before we do this, we need to first reindex */
60.127 + /* reindexing phase */
60.128 + /* this is the reindex phase */
60.129 + exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
60.130 + /* this is the inverse map */
60.131 + exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
60.132 + /*int reindex_myid;*/
60.133 + /* reindex */
60.134 + if( stray < num_nodes ) {
60.135 + /* find the first proxy rank */
60.136 + peer = stray - cnt;
60.137 + /* fix all ranks prior to this rank */
60.138 + for( i = 0; i < peer; i++){
60.139 + exchange_node->reindex_map[i] = i;
60.140 + }
60.141 + /* now, start the swap */
60.142 + exchange_node->reindex_map[peer] = peer;
60.143 + for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
60.144 + exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
60.145 + }
60.146 + i_temp = i;
60.147 + for( i = i_temp; i < stray; i++) {
60.148 + exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
60.149 + }
60.150 + /* now, finish it off */
60.151 + exchange_node->reindex_map[stray] = peer + 1;
60.152 + for( i = (stray+1); i < num_nodes; i++) {
60.153 + exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
60.154 + }
60.155 + /* debug print */
60.156 + /*
60.157 + for( i = 0; i < np; i++){
60.158 + fprintf(stderr,"%d ",reindex_map[i]);
60.159 + }
60.160 + fprintf(stderr,"\n");
60.161 + */
60.162 + } else {
60.163 + /* we have no extras, trivial reindexing */
60.164 + for( i = 0; i < num_nodes; i++){
60.165 + exchange_node->reindex_map[i] = i;
60.166 + }
60.167 + }
60.168 + /* finished reindexing */
60.169 +
60.170 + /* Now, I need to get my rank in the new indexing */
60.171 + for( i = 0; i < num_nodes; i++ ){
60.172 + if( node_rank == exchange_node->reindex_map[i] ){
60.173 + exchange_node->reindex_myid = i;
60.174 + break;
60.175 + }
60.176 + }
60.177 + /* Now, let's compute the inverse mapping here */
60.178 + for( i = 0; i < num_nodes; i++){
60.179 + j = 0;
60.180 + while(exchange_node->reindex_map[j] != i ){
60.181 + j++;
60.182 + }
60.183 + exchange_node->inv_reindex_map[i] = j;
60.184 + }
60.185 +
60.186 +
60.187 + /* Now we get the data sizes we should expect at each level */
60.188 + /* now get the size of the data I am to receive from each peer */
60.189 + /*int **payload_info;*/
60.190 + prev_data = (int *) malloc( num_nodes*sizeof(int) );
60.191 + if( NULL == prev_data ) {
60.192 + goto Error;
60.193 + }
60.194 +
60.195 + current_data = (int *) malloc( num_nodes*sizeof(int) );
60.196 + if( NULL == current_data ) {
60.197 + goto Error;
60.198 + }
60.199 +
60.200 +
60.201 + exchange_node->payload_info = (netpatterns_payload_t **) malloc(sizeof(netpatterns_payload_t *)*pow_k);
60.202 + if( NULL == exchange_node->payload_info) {
60.203 + goto Error;
60.204 + }
60.205 +
60.206 + for(i = 0; i < pow_k; i++){
60.207 + exchange_node->payload_info[i] = (netpatterns_payload_t *) malloc(sizeof(netpatterns_payload_t)*(tree_order-1));
60.208 + if( NULL == exchange_node->payload_info[i]) {
60.209 + goto Error;
60.210 + }
60.211 +
60.212 + }
60.213 + /* intialize the payload array
60.214 + This is the money struct, just need to initialize this with
60.215 + the subgroup information */
60.216 + /*
60.217 + for(i = 0; i < num_nodes; i++){
60.218 + prev_data[i] = 1;
60.219 + current_data[i] = 1;
60.220 + }
60.221 + */
60.222 +
60.223 + for(i = 0; i < num_nodes; i++){
60.224 + prev_data[i] = hier_ranks[i];
60.225 + current_data[i] = hier_ranks[i];
60.226 + }
60.227 +
60.228 + /* everyone will need to do this loop over all ranks
60.229 + * Phase I calculate the contribution from the extra ranks
60.230 + */
60.231 + for( myid = 0; myid < num_nodes; myid++) {
60.232 + /* get my new rank */
60.233 + for( j = 0; j < num_nodes; j++ ){
60.234 + /* this will be satisfied for one of the indices */
60.235 + if( myid == exchange_node->reindex_map[j] ){
60.236 + reindex_myid = j;
60.237 + break;
60.238 + }
60.239 + }
60.240 +
60.241 + for( j = stray; j < num_nodes; j++) {
60.242 + if(reindex_myid == ( j - cnt )) {
60.243 + /* then this is a proxy rank */
60.244 + prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
60.245 + break;
60.246 + }
60.247 +
60.248 + }
60.249 + }
60.250 +
60.251 + /* Phase II calculate the contribution from each recursive k - ing level
60.252 + *
60.253 + */
60.254 + k_temp1 = tree_order; /* k^1 */
60.255 + k_temp2 = 1; /* k^0 */
60.256 + peer_base = 0;
60.257 + base_temp = 0;
60.258 + for( i = 0; i < pow_k; i++) {
60.259 + /* get my new rank */
60.260 + for( myid = 0; myid < num_nodes; myid++){
60.261 + current_data[myid] = prev_data[myid];
60.262 + /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
60.263 + for( j = 0; j < num_nodes; j++ ){
60.264 + if( myid == exchange_node->reindex_map[j] ){
60.265 + reindex_myid = j;
60.266 + break;
60.267 + }
60.268 + }
60.269 + if( reindex_myid < stray ) {
60.270 + /* now start the actual algorithm */
60.271 + FIND_BASE(base,reindex_myid,i+1,tree_order);
60.272 + for( j = 0; j < ( tree_order - 1 ); j ++ ) {
60.273 + peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
60.274 + if( peer < stray ) {
60.275 + /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
60.276 + /* then get the data */
60.277 + if( node_rank == myid ){
60.278 + exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
60.279 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
60.280 + if( i > 0 ) {
60.281 +
60.282 + /* find my len and offset */
60.283 + FIND_BASE(peer_base,peer,i,tree_order);
60.284 + /* I do not want to mess with this, but it seems that I have no choice */
60.285 + ex_node = exchange_node->reindex_map[peer_base];
60.286 + /* now, find out how far down the line this guy really is */
60.287 + knt2 =0;
60.288 + for(kk = 0; kk < ex_node; kk++){
60.289 + knt2 += hier_ranks[kk];
60.290 + }
60.291 + exchange_node->payload_info[i][j].r_offset = knt2;
60.292 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
60.293 +
60.294 + FIND_BASE(base_temp,reindex_myid,i,tree_order);
60.295 + ex_node = exchange_node->reindex_map[base_temp];
60.296 + knt2 = 0;
60.297 + for( kk = 0; kk < ex_node; kk++){
60.298 + knt2 += hier_ranks[kk];
60.299 + }
60.300 + exchange_node->payload_info[i][j].s_offset =
60.301 + knt2; /* exchange_node->reindex_map[base_temp]; */
60.302 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
60.303 + } else {
60.304 + ex_node = exchange_node->reindex_map[peer];
60.305 + knt2 =0;
60.306 + for(kk = 0; kk < ex_node; kk++){
60.307 + knt2 += hier_ranks[kk];
60.308 + }
60.309 + exchange_node->payload_info[i][j].r_offset =
60.310 + knt2; /*exchange_node->reindex_map[peer]; */
60.311 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
60.312 + knt2 = 0;
60.313 + for(kk = 0; kk < myid; kk++){
60.314 + knt2 += hier_ranks[kk];
60.315 + }
60.316 + exchange_node->payload_info[i][j].s_offset = knt2;
60.317 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
60.318 + }
60.319 + /* how much I am to receive from this peer on this level */
60.320 + /* how much I am to send to this peer on this level */
60.321 + exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
60.322 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
60.323 + /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
60.324 + prev_data[exchange_node->reindex_map[peer]],
60.325 + exchange_node->reindex_map[peer], i+1);*/
60.326 + /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
60.327 + exchange_node->reindex_map[peer],i+1);*/
60.328 + }
60.329 +
60.330 + current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
60.331 + }
60.332 + }
60.333 + }
60.334 +
60.335 +
60.336 + }
60.337 + k_temp1 *= tree_order;
60.338 + k_temp2 *= tree_order;
60.339 + /* debug print */
60.340 + /* fprintf(stderr,"Level %d current data ",i+1);*/
60.341 + for( j = 0; j < num_nodes; j++){
60.342 + /* fprintf(stderr,"%d ",current_data[j]); */
60.343 + prev_data[j] = current_data[j];
60.344 + }
60.345 + /* fprintf(stderr,"\n");*/
60.346 +
60.347 + }
60.348 +
60.349 +
60.350 + /* this is the natural way to do recursive k-ing */
60.351 + /* should never have more than one extra rank per proxy */
60.352 + if( exchange_node->reindex_myid >= stray ){
60.353 + /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
60.354 + exchange_node->node_type = EXTRA_NODE;
60.355 + } else {
60.356 + exchange_node->node_type = EXCHANGE_NODE;
60.357 + }
60.358 +
60.359 + /* set node characteristics - node that is not within the largest
60.360 + * power of tree_order will just send its data to node that will participate
60.361 + * in the recursive k-ing, and get the result back at the end.
60.362 + * set the initial and final data exchanges - those that are not
60.363 + * part of the recursive k-ing.
60.364 + */
60.365 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.366 + exchange_node->n_extra_sources = 0;
60.367 + for( i = stray; i < num_nodes; i++) {
60.368 + if(exchange_node->reindex_myid == ( i - cnt )) {
60.369 + /* then I am a proxy rank and there is only a
60.370 + * single extra source
60.371 + */
60.372 + exchange_node->n_extra_sources = 1;
60.373 + break;
60.374 + }
60.375 + }
60.376 +
60.377 + if (exchange_node->n_extra_sources > 0) {
60.378 + exchange_node->rank_extra_sources_array = (int *) malloc
60.379 + (exchange_node->n_extra_sources * sizeof(int));
60.380 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.381 + goto Error;
60.382 + }
60.383 + /* you broke above */
60.384 + exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
60.385 + } else {
60.386 + exchange_node->rank_extra_sources_array = NULL;
60.387 + }
60.388 + } else {
60.389 + /* I am an extra rank, find my proxy rank */
60.390 + exchange_node->n_extra_sources = 1;
60.391 +
60.392 + exchange_node->rank_extra_sources_array = (int *) malloc
60.393 + (exchange_node->n_extra_sources * sizeof(int));
60.394 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.395 + goto Error;
60.396 + }
60.397 + exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
60.398 + }
60.399 +
60.400 +
60.401 + /* set the exchange pattern */
60.402 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.403 + /* yep, that's right PLUS 1 */
60.404 + exchange_node->n_exchanges = n_levels + 1;
60.405 + /* initialize this */
60.406 + exchange_node->n_actual_exchanges = 0;
60.407 + /* Allocate 2 dimension array thak keeps
60.408 + rank exchange information for each step*/
60.409 + exchange_node->rank_exchanges = (int **) malloc
60.410 + (exchange_node->n_exchanges * sizeof(int *));
60.411 + if(NULL == exchange_node->rank_exchanges) {
60.412 + goto Error;
60.413 + }
60.414 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.415 + exchange_node->rank_exchanges[i] = (int *) malloc
60.416 + ((tree_order - 1) * sizeof(int));
60.417 + if( NULL == exchange_node->rank_exchanges ) {
60.418 + goto Error;
60.419 + }
60.420 + }
60.421 + k_temp1 = tree_order;
60.422 + k_temp2 = 1;
60.423 + /* fill in exchange partners */
60.424 + /* Ok, now we start with the actual algorithm */
60.425 + for( i = 0; i < exchange_node->n_exchanges; i ++) {
60.426 + /*fprintf(stderr,"Starting Level %d\n",i+1);*/
60.427 +
60.428 + FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
60.429 + /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
60.430 + for( j = 0; j < (tree_order-1); j ++ ) {
60.431 + peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
60.432 + if ( peer < stray ) {
60.433 + exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
60.434 + /* an actual exchange occurs, bump the counter */
60.435 +
60.436 + } else {
60.437 + /* out of range, skip it - do not bump the n_actual_exchanges counter */
60.438 + exchange_node->rank_exchanges[i][j] = -1;
60.439 + }
60.440 +
60.441 + }
60.442 + k_temp1 *= tree_order;
60.443 + k_temp2 *= tree_order;
60.444 + }
60.445 + for(i = 0; i < pow_k; i++){
60.446 + for(j = 0; j < (tree_order-1); j++){
60.447 + if(-1 != exchange_node->rank_exchanges[i][j]){
60.448 + /* then bump the counter */
60.449 + exchange_node->n_actual_exchanges++;
60.450 + }
60.451 + }
60.452 + }
60.453 +
60.454 + } else {
60.455 + /* we are extra ranks and we don't participate in the exchange :( */
60.456 + exchange_node->n_exchanges=0;
60.457 + exchange_node->rank_exchanges=NULL;
60.458 + }
60.459 +
60.460 +
60.461 + /* set the number of tags needed per stripe - this must be the
60.462 + * same across all procs in the communicator.
60.463 + */
60.464 + /* do we need this one */
60.465 + exchange_node->n_tags = tree_order * n_levels + 1;
60.466 +
60.467 + free(prev_data);
60.468 + free(current_data);
60.469 + free(group_info);
60.470 +
60.471 + /* successful return */
60.472 + return OMPI_SUCCESS;
60.473 +
60.474 +Error:
60.475 +
60.476 + if (NULL != exchange_node->rank_extra_sources_array) {
60.477 + free(exchange_node->rank_extra_sources_array);
60.478 + }
60.479 +
60.480 + if (NULL != exchange_node->rank_exchanges) {
60.481 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.482 + if (NULL != exchange_node->rank_exchanges[i]) {
60.483 + free(exchange_node->rank_exchanges[i]);
60.484 + }
60.485 + }
60.486 + free(exchange_node->rank_exchanges);
60.487 + }
60.488 +
60.489 + if (NULL != prev_data ){
60.490 + free(prev_data);
60.491 + }
60.492 +
60.493 + if(NULL != current_data) {
60.494 + free(current_data);
60.495 + }
60.496 +
60.497 + if(NULL != group_info) {
60.498 + free(group_info);
60.499 + }
60.500 +
60.501 + /* error return */
60.502 + return OMPI_ERROR;
60.503 +}
60.504 +
60.505 +
60.506 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node(
60.507 + int num_nodes, int node_rank, int tree_order,
60.508 + netpatterns_k_exchange_node_t *exchange_node)
60.509 +{
60.510 + /* local variables */
60.511 + int i, j, tmp, cnt;
60.512 + int n_levels;
60.513 + int k_base, kpow_num, peer;
60.514 +
60.515 + NETPATTERNS_VERBOSE(
60.516 + ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
60.517 + num_nodes, node_rank, tree_order));
60.518 +
60.519 + assert(num_nodes > 1);
60.520 + assert(tree_order > 1);
60.521 + if (tree_order > num_nodes) {
60.522 + tree_order = num_nodes;
60.523 + }
60.524 +
60.525 + exchange_node->tree_order = tree_order;
60.526 +
60.527 + /* figure out number of levels in the tree */
60.528 + n_levels = 0;
60.529 + /* cnt - number of ranks in given level */
60.530 + cnt=1;
60.531 + while ( num_nodes > cnt ) {
60.532 + cnt *= tree_order;
60.533 + n_levels++;
60.534 + };
60.535 +
60.536 + /* figure out the largest power of tree_order that is less than or equal to
60.537 + * num_nodes */
60.538 + if ( cnt > num_nodes) {
60.539 + cnt /= tree_order;
60.540 + n_levels--;
60.541 + }
60.542 +
60.543 + exchange_node->log_tree_order = n_levels;
60.544 + exchange_node->n_largest_pow_tree_order = cnt;
60.545 +
60.546 + /* set node characteristics - node that is not within the largest
60.547 + * power of tree_order will just send it's data to node that will participate
60.548 + * in the recursive doubling, and get the result back at the end.
60.549 + */
60.550 + if (node_rank + 1 > cnt) {
60.551 + exchange_node->node_type = EXTRA_NODE;
60.552 + } else {
60.553 + exchange_node->node_type = EXCHANGE_NODE;
60.554 + }
60.555 +
60.556 +
60.557 + /* set the initial and final data exchanges - those that are not
60.558 + * part of the recursive doubling.
60.559 + */
60.560 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.561 + exchange_node->n_extra_sources = 0;
60.562 + for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
60.563 + tmp < num_nodes && i < tree_order - 1;
60.564 + ++i, ++tmp) {
60.565 + ++exchange_node->n_extra_sources;
60.566 + }
60.567 +
60.568 + assert(exchange_node->n_extra_sources < tree_order);
60.569 +
60.570 + if (exchange_node->n_extra_sources > 0) {
60.571 + exchange_node->rank_extra_sources_array = (int *) malloc
60.572 + (exchange_node->n_extra_sources * sizeof(int));
60.573 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.574 + goto Error;
60.575 + }
60.576 + for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
60.577 + i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
60.578 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
60.579 + exchange_node->rank_extra_sources_array[i] = tmp;
60.580 + }
60.581 + } else {
60.582 + exchange_node->rank_extra_sources_array = NULL;
60.583 + }
60.584 + } else {
60.585 + exchange_node->n_extra_sources = 1;
60.586 + exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
60.587 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.588 + goto Error;
60.589 + }
60.590 + exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
60.591 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
60.592 + exchange_node->rank_extra_sources_array[0] ));
60.593 + }
60.594 +
60.595 + /* set the exchange pattern */
60.596 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.597 + exchange_node->n_exchanges = n_levels;
60.598 + /* Allocate 2 dimension array thak keeps
60.599 + rank exchange information for each step*/
60.600 + exchange_node->rank_exchanges = (int **) malloc
60.601 + (exchange_node->n_exchanges * sizeof(int *));
60.602 + if(NULL == exchange_node->rank_exchanges) {
60.603 + goto Error;
60.604 + }
60.605 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.606 + exchange_node->rank_exchanges[i] = (int *) malloc
60.607 + ((tree_order - 1) * sizeof(int));
60.608 + if( NULL == exchange_node->rank_exchanges ) {
60.609 + goto Error;
60.610 + }
60.611 + }
60.612 + /* fill in exchange partners */
60.613 + for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges;
60.614 + i++, kpow_num *= tree_order) {
60.615 + k_base = node_rank / (kpow_num * tree_order);
60.616 + for(j = 1; j < tree_order; j++) {
60.617 + peer = node_rank + kpow_num * j;
60.618 + if (k_base != peer/(kpow_num * tree_order)) {
60.619 + /* Wraparound the number */
60.620 + peer = k_base * (kpow_num * tree_order) +
60.621 + peer % (kpow_num * tree_order);
60.622 + }
60.623 + exchange_node->rank_exchanges[i][j - 1] = peer;
60.624 + NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d",
60.625 + i, j, tree_order, peer));
60.626 + }
60.627 + }
60.628 + } else {
60.629 + exchange_node->n_exchanges=0;
60.630 + exchange_node->rank_exchanges=NULL;
60.631 + }
60.632 +
60.633 + /* set the number of tags needed per stripe - this must be the
60.634 + * same across all procs in the communicator.
60.635 + */
60.636 + /* do we need this one */
60.637 + exchange_node->n_tags = tree_order * n_levels + 1;
60.638 +
60.639 + /* successful return */
60.640 + return OMPI_SUCCESS;
60.641 +
60.642 +Error:
60.643 +
60.644 + if (NULL != exchange_node->rank_extra_sources_array) {
60.645 + free(exchange_node->rank_extra_sources_array);
60.646 + }
60.647 +
60.648 + if (NULL != exchange_node->rank_exchanges) {
60.649 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.650 + if (NULL != exchange_node->rank_exchanges[i]) {
60.651 + free(exchange_node->rank_exchanges[i]);
60.652 + }
60.653 + }
60.654 + free(exchange_node->rank_exchanges);
60.655 + }
60.656 +
60.657 + /* error return */
60.658 + return OMPI_ERROR;
60.659 +}
60.660 +
60.661 +#if 1
60.662 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
60.663 + netpatterns_pair_exchange_node_t *exchange_node)
60.664 +{
60.665 + /* local variables */
60.666 + int i, tmp, cnt;
60.667 + int n_levels;
60.668 + int shift, mask;
60.669 +
60.670 + NETPATTERNS_VERBOSE(("Enter netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
60.671 +
60.672 + assert(num_nodes > 1);
60.673 + while (tree_order > num_nodes) {
60.674 + tree_order /= 2;
60.675 + }
60.676 +
60.677 + exchange_node->tree_order = tree_order;
60.678 + /* We support only tree_order that are power of two */
60.679 + assert(0 == (tree_order & (tree_order - 1)));
60.680 +
60.681 + /* figure out number of levels in the tree */
60.682 + n_levels = 0;
60.683 + /* cnt - number of ranks in given level */
60.684 + cnt=1;
60.685 + while ( num_nodes > cnt ) {
60.686 + cnt *= tree_order;
60.687 + n_levels++;
60.688 + };
60.689 +
60.690 + /* figure out the largest power of tree_order that is less than or equal to
60.691 + * num_nodes */
60.692 + if ( cnt > num_nodes) {
60.693 + cnt /= tree_order;
60.694 + n_levels--;
60.695 + }
60.696 + exchange_node->log_tree_order = n_levels;
60.697 + if (2 == tree_order) {
60.698 + exchange_node->log_2 = exchange_node->log_tree_order;
60.699 + }
60.700 +
60.701 + tmp=1;
60.702 + for (i=0 ; i < n_levels ; i++ ) {
60.703 + tmp *= tree_order;
60.704 + }
60.705 + /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
60.706 + assert(tmp == cnt);
60.707 +
60.708 + exchange_node->n_largest_pow_tree_order = tmp;
60.709 + if (2 == tree_order) {
60.710 + exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
60.711 + }
60.712 +
60.713 + /* set node characteristics - node that is not within the largest
60.714 + * power of tree_order will just send it's data to node that will participate
60.715 + * in the recursive doubling, and get the result back at the end.
60.716 + */
60.717 + if ( node_rank + 1 > cnt ) {
60.718 + exchange_node->node_type = EXTRA_NODE;
60.719 + } else {
60.720 + exchange_node->node_type = EXCHANGE_NODE;
60.721 + }
60.722 +
60.723 + /* set the initial and final data exchanges - those that are not
60.724 + * part of the recursive doubling.
60.725 + */
60.726 + if ( EXCHANGE_NODE == exchange_node->node_type ) {
60.727 + exchange_node->n_extra_sources = 0;
60.728 + for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
60.729 + ++exchange_node->n_extra_sources;
60.730 + }
60.731 + if (exchange_node->n_extra_sources > 0) {
60.732 + exchange_node->rank_extra_sources_array = (int *) malloc
60.733 + (exchange_node->n_extra_sources * sizeof(int));
60.734 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.735 + goto Error;
60.736 + }
60.737 + for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
60.738 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
60.739 + exchange_node->rank_extra_sources_array[i] = tmp;
60.740 + }
60.741 + } else {
60.742 + exchange_node->rank_extra_sources_array = NULL;
60.743 + }
60.744 + } else {
60.745 + exchange_node->n_extra_sources = 1;
60.746 + exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
60.747 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.748 + goto Error;
60.749 + }
60.750 + exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
60.751 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
60.752 + }
60.753 +
60.754 + /* Ishai: To be compatable with the old structure - should be remoived later */
60.755 + if (1 == exchange_node->n_extra_sources) {
60.756 + exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
60.757 + } else {
60.758 + exchange_node->rank_extra_source = -1;
60.759 + }
60.760 +
60.761 + /* set the exchange pattern */
60.762 + if ( EXCHANGE_NODE == exchange_node->node_type ) {
60.763 + exchange_node->n_exchanges = n_levels * (tree_order - 1);
60.764 + exchange_node->rank_exchanges = (int *) malloc
60.765 + (exchange_node->n_exchanges * sizeof(int));
60.766 + if( NULL == exchange_node->rank_exchanges ) {
60.767 + goto Error;
60.768 + }
60.769 +
60.770 + /* fill in exchange partners */
60.771 + for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
60.772 + for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
60.773 + exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
60.774 + NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
60.775 + }
60.776 + }
60.777 +
60.778 + } else {
60.779 +
60.780 + exchange_node->n_exchanges=0;
60.781 + exchange_node->rank_exchanges=NULL;
60.782 +
60.783 + }
60.784 +
60.785 + /* set the number of tags needed per stripe - this must be the
60.786 + * same across all procs in the communicator.
60.787 + */
60.788 + /* Ishai: Need to find out what is n_tags */
60.789 + exchange_node->n_tags = tree_order * n_levels + 1;
60.790 +
60.791 + /* successful return */
60.792 + return OMPI_SUCCESS;
60.793 +
60.794 +Error:
60.795 + if (exchange_node->rank_extra_sources_array != NULL) {
60.796 + free(exchange_node->rank_extra_sources_array);
60.797 + }
60.798 +
60.799 + /* error return */
60.800 + return OMPI_ERROR;
60.801 +}
60.802 +
60.803 +OMPI_DECLSPEC void netpatterns_free_recursive_doubling_tree_node(
60.804 + netpatterns_pair_exchange_node_t *exchange_node)
60.805 +{
60.806 + NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
60.807 + if (exchange_node->rank_extra_sources_array != NULL) {
60.808 + free(exchange_node->rank_extra_sources_array);
60.809 + }
60.810 +
60.811 + if (exchange_node->rank_exchanges != NULL) {
60.812 + free(exchange_node->rank_exchanges);
60.813 + }
60.814 +}
60.815 +#endif
60.816 +
60.817 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
60.818 + netpatterns_pair_exchange_node_t *exchange_node)
60.819 +{
60.820 + return netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
60.821 +}
60.822 +
60.823 +#if 0
60.824 +/*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
60.825 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
60.826 + netpatterns_pair_exchange_node_t *exchange_node)
60.827 +{
60.828 + /* local variables */
60.829 + /*int tree_order;*/
60.830 + int i,tmp,cnt,result,n_extra_nodes;
60.831 + int n_exchanges;
60.832 +
60.833 + /* figure out number of levels in the tree */
60.834 +
60.835 + n_exchanges=0;
60.836 + result=num_nodes;
60.837 +/* tree_order=2;*/
60.838 + /* cnt - number of ranks in given level */
60.839 + cnt=1;
60.840 + while( num_nodes > cnt ) {
60.841 + cnt*=tree_order;
60.842 + n_exchanges++;
60.843 + };
60.844 +
60.845 + /* figure out the largest power of 2 that is less than or equal to
60.846 + * num_nodes */
60.847 + if( cnt > num_nodes) {
60.848 + cnt/=tree_order;
60.849 + n_exchanges--;
60.850 + }
60.851 + exchange_node->log_2=n_exchanges;
60.852 +
60.853 + tmp=1;
60.854 + for(i=0 ; i < n_exchanges ; i++ ) {
60.855 + tmp*=2;
60.856 + }
60.857 + exchange_node->n_largest_pow_2=tmp;
60.858 +
60.859 + /* set node characteristics - node that is not within the largest
60.860 + * power of 2 will just send it's data to node that will participate
60.861 + * in the recursive doubling, and get the result back at the end.
60.862 + */
60.863 + if( node_rank+1 > cnt ) {
60.864 + exchange_node->node_type=EXTRA_NODE;
60.865 + } else {
60.866 + exchange_node->node_type=EXCHANGE_NODE;
60.867 + }
60.868 +
60.869 + /* set the initial and final data exchanges - those that are not
60.870 + * part of the recursive doubling.
60.871 + */
60.872 + n_extra_nodes=num_nodes-cnt;
60.873 +
60.874 + if ( EXCHANGE_NODE == exchange_node->node_type ) {
60.875 +
60.876 + if( node_rank < n_extra_nodes ) {
60.877 + exchange_node->n_extra_sources=1;
60.878 + exchange_node->rank_extra_source=cnt+node_rank;
60.879 + } else {
60.880 + exchange_node->n_extra_sources=0;
60.881 + exchange_node->rank_extra_source=-1;
60.882 + }
60.883 +
60.884 + } else {
60.885 + exchange_node->n_extra_sources=1;
60.886 + exchange_node->rank_extra_source=node_rank-cnt;
60.887 + }
60.888 +
60.889 + /* set the exchange pattern */
60.890 + if( EXCHANGE_NODE == exchange_node->node_type ) {
60.891 +
60.892 + exchange_node->n_exchanges=n_exchanges;
60.893 + exchange_node->rank_exchanges=(int *) malloc
60.894 + (n_exchanges*sizeof(int));
60.895 + if( NULL == exchange_node->rank_exchanges ) {
60.896 + goto Error;
60.897 + }
60.898 +
60.899 + /* fill in exchange partners */
60.900 + result=1;
60.901 + tmp=node_rank;
60.902 + for( i=0 ; i < n_exchanges ; i++ ) {
60.903 + if(tmp & 1 ) {
60.904 + exchange_node->rank_exchanges[i]=
60.905 + node_rank-result;
60.906 + } else {
60.907 + exchange_node->rank_exchanges[i]=
60.908 + node_rank+result;
60.909 + }
60.910 + result*=2;
60.911 + tmp/=2;
60.912 + }
60.913 +
60.914 + } else {
60.915 +
60.916 + exchange_node->n_exchanges=0;
60.917 + exchange_node->rank_exchanges=NULL;
60.918 +
60.919 + }
60.920 +
60.921 + /* set the number of tags needed per stripe - this must be the
60.922 + * same across all procs in the communicator.
60.923 + */
60.924 + exchange_node->n_tags=2*n_exchanges+1;
60.925 +
60.926 + /* Ishai: to make sure free will work also for people that call this function */
60.927 + exchange_node->rank_extra_sources_array = NULL;
60.928 +
60.929 + /* successful return */
60.930 + return OMPI_SUCCESS;
60.931 +
60.932 +Error:
60.933 +
60.934 + /* error return */
60.935 + return OMPI_ERROR;
60.936 +}
60.937 +#endif
60.938 +
61.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
61.2 +++ b/ompi/patterns/net/netpatterns_knomial_tree.h Tue Feb 05 21:52:55 2013 +0000
61.3 @@ -0,0 +1,254 @@
61.4 +/*
61.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
61.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
61.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
61.8 + * All rights reserved.
61.9 + * $COPYRIGHT$
61.10 + *
61.11 + * Additional copyrights may follow
61.12 + *
61.13 + * $HEADER$
61.14 + */
61.15 +
61.16 +#ifndef COMM_PATTERNS_KNOMIAL_TREE_H
61.17 +#define COMM_PATTERNS_KNOMIAL_TREE_H
61.18 +
61.19 +#include "ompi_config.h"
61.20 +
61.21 +BEGIN_C_DECLS
61.22 +
61.23 +
61.24 +/*
61.25 + * Pair-wise data exchange
61.26 + */
61.27 +
61.28 +/* enum for node type */
61.29 +enum {
61.30 + EXCHANGE_NODE,
61.31 + EXTRA_NODE
61.32 +};
61.33 +
61.34 +struct netpatterns_pair_exchange_node_t {
61.35 +
61.36 + /* Order of a node in the tree - usually 2 */
61.37 + int tree_order;
61.38 +
61.39 + /* number of nodes this node will exchange data with */
61.40 + int n_exchanges;
61.41 +
61.42 + /* ranks of nodes involved in data exchnge */
61.43 + int *rank_exchanges;
61.44 +
61.45 + /* number of extra sources of data - outside largest power of 2 in
61.46 + * this group */
61.47 + int n_extra_sources;
61.48 +
61.49 + /* rank of the extra source */
61.50 + /* deprecated */ int rank_extra_source;
61.51 + int *rank_extra_sources_array;
61.52 +
61.53 + /* number of tags needed per stripe */
61.54 + int n_tags;
61.55 +
61.56 + /* log 2 of largest full power of 2 for this node set */
61.57 + /* deprecated */ int log_2;
61.58 + int log_tree_order;
61.59 +
61.60 + /* largest power of 2 that fits in this group */
61.61 + /* deprecated */ int n_largest_pow_2;
61.62 + int n_largest_pow_tree_order;
61.63 +
61.64 + /* node type */
61.65 + int node_type;
61.66 +
61.67 +};
61.68 +typedef struct netpatterns_pair_exchange_node_t netpatterns_pair_exchange_node_t;
61.69 +
61.70 +struct netpatterns_payload_t {
61.71 + int s_len;
61.72 + int r_len;
61.73 + int s_offset;
61.74 + int r_offset;
61.75 +};
61.76 +typedef struct netpatterns_payload_t netpatterns_payload_t;
61.77 +
61.78 +struct netpatterns_k_exchange_node_t {
61.79 + /* Order of a node in the tree - usually 2 */
61.80 + int tree_order;
61.81 + /* number of nodes this node will exchange data with */
61.82 + int n_exchanges;
61.83 + /* total number of exchanges that I actually participate in */
61.84 + int n_actual_exchanges;
61.85 + /* ranks of nodes involved in data exchnge */
61.86 + int **rank_exchanges;
61.87 + /* number of extra sources of data - outside largest power of 2 in
61.88 + * this group */
61.89 + int n_extra_sources;
61.90 + /* rank/s of the extra source */
61.91 + int *rank_extra_sources_array;
61.92 + /* number of tags needed per stripe */
61.93 + int n_tags;
61.94 + /* log k of largest full power of k for this node set */
61.95 + int log_tree_order;
61.96 + /* largest power of k that fits in this group */
61.97 + int n_largest_pow_tree_order;
61.98 + /* node type */
61.99 + int node_type;
61.100 + /* start of extra ranks k_nomial */
61.101 + int k_nomial_stray;
61.102 + /* reindex map */
61.103 + int *reindex_map;
61.104 + /* inverse of reindex map, i.e. given a reindexed id find out its actual rank */
61.105 + int *inv_reindex_map;
61.106 + /* reindexed node_rank */
61.107 + int reindex_myid;
61.108 + /* 2-d array that hold payload info for each level of recursive k-ing */
61.109 + netpatterns_payload_t **payload_info;
61.110 +};
61.111 +typedef struct netpatterns_k_exchange_node_t
61.112 + netpatterns_k_exchange_node_t;
61.113 +
61.114 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
61.115 + netpatterns_pair_exchange_node_t *exchange_node);
61.116 +
61.117 +OMPI_DECLSPEC void netpatterns_free_recursive_doubling_tree_node(
61.118 + netpatterns_pair_exchange_node_t *exchange_node);
61.119 +
61.120 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
61.121 + netpatterns_pair_exchange_node_t *exchange_node);
61.122 +
61.123 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node(
61.124 + int num_nodes, int node_rank, int tree_order,
61.125 + netpatterns_k_exchange_node_t *exchange_node);
61.126 +
61.127 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node(
61.128 + int num_nodes, int node_rank, int tree_order, int *hier_ranks,
61.129 + netpatterns_k_exchange_node_t *exchange_node);
61.130 +
61.131 +
61.132 +/* Input: k_exchange_node structure
61.133 + Output: index in rank_exchanges array that points
61.134 + to the "start_point" for outgoing send.
61.135 +
61.136 + Please see below example of usage:
61.137 + for (i = start_point ; i > 0; i--)
61.138 + for (k = 0; k < tree_radix; k++)
61.139 + send messages to exchange_node->rank_exchanges[i][k];
61.140 +*/
61.141 +
61.142 +static inline __opal_attribute_always_inline__
61.143 +int netpatterns_get_knomial_level(
61.144 + int my_rank, int src_rank,
61.145 + int radix, int size,
61.146 + int *k_level)
61.147 +{
61.148 + int distance,
61.149 + pow_k;
61.150 + int logk_level = 0;
61.151 +
61.152 + /* Calculate disctance from source of data */
61.153 + distance = src_rank - my_rank;
61.154 +
61.155 + /* Wrap around */
61.156 + if (0 > distance) {
61.157 + distance += size;
61.158 + }
61.159 +
61.160 + pow_k = 1;
61.161 + while(distance / (pow_k * radix)) {
61.162 + pow_k *= radix;
61.163 + ++logk_level;
61.164 + }
61.165 + --logk_level;
61.166 +
61.167 + *k_level = pow_k;
61.168 + return logk_level;
61.169 +}
61.170 +
61.171 +/* Input: my_rank, root, radix, size
61.172 + * Output: source of the data, offset in power of K
61.173 + */
61.174 +static inline __opal_attribute_always_inline__
61.175 +int netpatterns_get_knomial_data_source(
61.176 + int my_rank, int root, int radix, int size,
61.177 + int *k_level, int *logk_level)
61.178 +{
61.179 + int level = radix;
61.180 + int step = 0;
61.181 +
61.182 + /* Calculate source of the data */
61.183 + while((0 == (root - my_rank) % level)
61.184 + && (level <= size)) {
61.185 + level *= radix;
61.186 + ++step;
61.187 + }
61.188 +
61.189 + *k_level = level/radix;
61.190 + *logk_level = step;
61.191 + return my_rank - (my_rank % level - root % level);
61.192 +}
61.193 +
61.194 +/* Input: my_rank, radix,
61.195 + * k_level - that you get from netpatterns_get_knomial_data_source
61.196 + * k_step - some integer
61.197 + * Output: peer - next children in the tree
61.198 + * Usage:
61.199 + * src = netpatterns_get_knomial_data_source(
61.200 + * my_rank, root, radix, size,
61.201 + * &k_level, &logk_level)
61.202 + * recv_from(src......);
61.203 + *
61.204 + * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, k_level, my_rank);
61.205 + * while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
61.206 + * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer);
61.207 + * send_to(peer....);
61.208 + * }
61.209 + * for more example please grep in ptpcoll bcol bcast files
61.210 + */
61.211 +
61.212 +typedef struct netpatterns_knomial_step_info_t {
61.213 + int k_step;
61.214 + int k_level;
61.215 + int k_tmp_peer;
61.216 +} netpatterns_knomial_step_info_t;
61.217 +
61.218 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix)\
61.219 +do { \
61.220 + if (1 != step_info.k_step) { \
61.221 + step_info.k_level /= radix; \
61.222 + } \
61.223 +} while (0) \
61.224 +
61.225 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, in_k_level, in_peer)\
61.226 +do { \
61.227 + step_info.k_step = 1; \
61.228 + step_info.k_level = in_k_level; \
61.229 + step_info.k_tmp_peer = in_peer; \
61.230 +} while (0)
61.231 +
61.232 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info) \
61.233 + (step_info.k_level > 1)
61.234 +
61.235 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer) \
61.236 +do { \
61.237 + int rank_radix_base = my_rank/step_info.k_level; \
61.238 + \
61.239 + peer = step_info.k_tmp_peer + step_info.k_level/radix; \
61.240 + if (rank_radix_base != peer/step_info.k_level) { \
61.241 + /* Wraparound the number */ \
61.242 + peer -= step_info.k_level; \
61.243 + assert(peer >=0); \
61.244 + } \
61.245 + ++step_info.k_step; \
61.246 + if (radix == step_info.k_step) { \
61.247 + step_info.k_level /= radix; \
61.248 + step_info.k_step = 1; \
61.249 + step_info.k_tmp_peer = my_rank; \
61.250 + } else { \
61.251 + step_info.k_tmp_peer = peer; \
61.252 + } \
61.253 + \
61.254 +} while (0)
61.255 +
61.256 +END_C_DECLS
61.257 +#endif
62.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
62.2 +++ b/ompi/patterns/net/netpatterns_multinomial_tree.c Tue Feb 05 21:52:55 2013 +0000
62.3 @@ -0,0 +1,190 @@
62.4 +/*
62.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
62.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
62.7 + * $COPYRIGHT$
62.8 + *
62.9 + * Additional copyrights may follow
62.10 + *
62.11 + * $HEADER$
62.12 + */
62.13 +
62.14 +#include "ompi_config.h"
62.15 +#ifdef HAVE_UNISTD_H
62.16 +#include <unistd.h>
62.17 +#endif
62.18 +#include <sys/types.h>
62.19 +#ifdef HAVE_SYS_MMAN_H
62.20 +#include <sys/mman.h>
62.21 +#endif
62.22 +#include <fcntl.h>
62.23 +#include <stdlib.h>
62.24 +
62.25 +#include "ompi/constants.h"
62.26 +#include "netpatterns.h"
62.27 +
62.28 +
62.29 +/* setup an multi-nomial tree - for each node in the tree
62.30 + * this returns it's parent, and it's children */
62.31 +
62.32 +OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
62.33 + netpatterns_tree_node_t *tree_nodes)
62.34 +{
62.35 + /* local variables */
62.36 + int i,result;
62.37 + int cnt, n_nodes_in_this_level,node_index;
62.38 + int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
62.39 + int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
62.40 + int n_lvls_in_tree;
62.41 +
62.42 + /* sanity check */
62.43 + if( 1 >= tree_order ) {
62.44 + goto Error;
62.45 + }
62.46 +
62.47 +
62.48 + /* figure out number of levels in the tree */
62.49 +
62.50 + n_lvls_in_tree=0;
62.51 + result=num_nodes;
62.52 + /* cnt - number of ranks in given level */
62.53 + cnt=1;
62.54 + /* cummulative count of ranks */
62.55 + while( 0 < result ) {
62.56 + result-=cnt;
62.57 + cnt*=tree_order;
62.58 + n_lvls_in_tree++;
62.59 + };
62.60 +
62.61 + /* loop over tree levels */
62.62 + n_nodes_in_this_level=1;
62.63 + node_index=-1;
62.64 + n_cum_nodes=0;
62.65 + for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
62.66 +
62.67 + /* loop over nodes in current level */
62.68 + for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
62.69 + /* get node index */
62.70 + node_index++;
62.71 +
62.72 + /* break if reach group size */
62.73 + if( node_index == num_nodes) {
62.74 + break;
62.75 + }
62.76 +
62.77 + tree_nodes[node_index].my_rank=node_index;
62.78 + tree_nodes[node_index].children_ranks=NULL;
62.79 +
62.80 + /*
62.81 + * Parents
62.82 + */
62.83 + if( 0 == current_level ) {
62.84 + tree_nodes[node_index].n_parents=0;
62.85 + /* get parent index */
62.86 + tree_nodes[node_index].parent_rank=-1;
62.87 + } else {
62.88 + tree_nodes[node_index].n_parents=1;
62.89 + /* get parent index */
62.90 + n_nodes_prev_level=n_nodes_in_this_level/tree_order;
62.91 + if( current_level == n_lvls_in_tree -1 ) {
62.92 + /* load balance the lowest level */
62.93 + parent_rank=node-
62.94 + (node/n_nodes_prev_level)*n_nodes_prev_level;
62.95 + parent_rank=n_cum_nodes-n_nodes_prev_level+
62.96 + parent_rank;
62.97 + tree_nodes[node_index].parent_rank=parent_rank;
62.98 + } else {
62.99 + tree_nodes[node_index].parent_rank=
62.100 + (n_cum_nodes-n_nodes_prev_level)+node/tree_order;
62.101 + }
62.102 + }
62.103 +
62.104 + /*
62.105 + * Children
62.106 + */
62.107 +
62.108 + /* get number of children */
62.109 + if( (n_lvls_in_tree-1) == current_level ) {
62.110 + /* leaves have no nodes */
62.111 + tree_nodes[node_index].n_children=0;
62.112 + tree_nodes[node_index].children_ranks=NULL;
62.113 + } else {
62.114 + /* take into account last level being incomplete */
62.115 + if( (n_lvls_in_tree-2) == current_level ) {
62.116 + /* last level is load balanced */
62.117 + n_nodes_in_last_level=num_nodes-
62.118 + (n_cum_nodes+n_nodes_in_this_level);
62.119 + n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
62.120 + n_in_partial_stipe=n_nodes_in_last_level-
62.121 + n_full_stripes*n_nodes_in_this_level;
62.122 + n_children=n_full_stripes;
62.123 + if( n_full_stripes < tree_order ) {
62.124 + if( node <= n_in_partial_stipe-1 ) {
62.125 + n_children++;
62.126 + }
62.127 + }
62.128 + tree_nodes[node_index].n_children=n_children;
62.129 + if( 0 < n_children ) {
62.130 + tree_nodes[node_index].children_ranks=(int *)
62.131 + malloc(sizeof(int)*n_children);
62.132 + if( NULL == tree_nodes[node_index].children_ranks) {
62.133 + goto Error;
62.134 + }
62.135 + } else {
62.136 + tree_nodes[node_index].children_ranks=NULL;
62.137 + }
62.138 + /* fill in list */
62.139 + for( rank=0 ; rank < n_children ; rank++ ) {
62.140 + tree_nodes[node_index].children_ranks[rank]=
62.141 + node+rank*n_nodes_in_this_level;
62.142 + tree_nodes[node_index].children_ranks[rank]+=
62.143 + (n_cum_nodes+n_nodes_in_this_level);
62.144 + }
62.145 + } else {
62.146 + n_children=tree_order;
62.147 + tree_nodes[node_index].n_children=tree_order;
62.148 + tree_nodes[node_index].children_ranks=(int *)
62.149 + malloc(sizeof(int)*n_children);
62.150 + if( NULL == tree_nodes[node_index].children_ranks) {
62.151 + goto Error;
62.152 + }
62.153 + for( rank=0 ; rank < n_children ; rank++ ) {
62.154 + tree_nodes[node_index].children_ranks[rank]=
62.155 + rank+tree_order*node;
62.156 + tree_nodes[node_index].children_ranks[rank]+=
62.157 + (n_cum_nodes+n_nodes_in_this_level);
62.158 + }
62.159 + }
62.160 + }
62.161 +
62.162 + } /* end node loop */
62.163 +
62.164 + /* update helper counters */
62.165 + n_cum_nodes+=n_nodes_in_this_level;
62.166 + n_nodes_in_this_level*=tree_order;
62.167 + }
62.168 +
62.169 + /* set node type */
62.170 + for(i=0 ; i < num_nodes ; i++ ) {
62.171 + if( 0 == tree_nodes[i].n_parents ) {
62.172 + tree_nodes[i].my_node_type=ROOT_NODE;
62.173 + } else if ( 0 == tree_nodes[i].n_children ) {
62.174 + tree_nodes[i].my_node_type=LEAF_NODE;
62.175 + } else {
62.176 + tree_nodes[i].my_node_type=INTERIOR_NODE;
62.177 + }
62.178 + }
62.179 +
62.180 + /* successful return */
62.181 + return OMPI_SUCCESS;
62.182 +
62.183 +Error:
62.184 + /* free allocated memory */
62.185 + for( i=0 ; i < num_nodes ; i++ ) {
62.186 + if( NULL != tree_nodes[i].children_ranks ) {
62.187 + free(tree_nodes[i].children_ranks);
62.188 + }
62.189 + }
62.190 +
62.191 + /* error return */
62.192 + return OMPI_ERROR;
62.193 +}
63.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
63.2 +++ b/ompi/patterns/net/netpatterns_nary_tree.c Tue Feb 05 21:52:55 2013 +0000
63.3 @@ -0,0 +1,442 @@
63.4 +/*
63.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
63.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
63.7 + * $COPYRIGHT$
63.8 + *
63.9 + * Additional copyrights may follow
63.10 + *
63.11 + * $HEADER$
63.12 + */
63.13 +
63.14 +#include "ompi_config.h"
63.15 +#ifdef HAVE_UNISTD_H
63.16 +#include <unistd.h>
63.17 +#endif
63.18 +#include <sys/types.h>
63.19 +#ifdef HAVE_SYS_MMAN_H
63.20 +#include <sys/mman.h>
63.21 +#endif
63.22 +#include <fcntl.h>
63.23 +#include <errno.h>
63.24 +#include <stdlib.h>
63.25 +
63.26 +#include "ompi/constants.h"
63.27 +#include "netpatterns.h"
63.28 +
63.29 +/*
63.30 + * Create mmaped shared file
63.31 + */
63.32 +
63.33 +/* setup an n-array tree */
63.34 +
63.35 +int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
63.36 + netpatterns_tree_node_t *my_node)
63.37 +{
63.38 + /* local variables */
63.39 + int n_levels, result;
63.40 + int my_level_in_tree, cnt;
63.41 + int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
63.42 + int start_index,end_index;
63.43 +
63.44 + /* sanity check */
63.45 + if( 1 >= tree_order ) {
63.46 + goto Error;
63.47 + }
63.48 +
63.49 + my_node->my_rank=my_rank;
63.50 + my_node->tree_size=num_nodes;
63.51 +
63.52 + /* figure out number of levels in tree */
63.53 + n_levels=0;
63.54 + result=num_nodes-1;
63.55 + while (0 < result ) {
63.56 + result/=tree_order;
63.57 + n_levels++;
63.58 + };
63.59 +
63.60 + /* figure out who my children and parents are */
63.61 + my_level_in_tree=-1;
63.62 + result=my_rank;
63.63 + /* cnt - number of ranks in given level */
63.64 + cnt=1;
63.65 + /* cummulative count of ranks */
63.66 + while( 0 <= result ) {
63.67 + result-=cnt;
63.68 + cnt*=tree_order;
63.69 + my_level_in_tree++;
63.70 + };
63.71 + /* int my_level_in_tree, n_children, n_parents; */
63.72 +
63.73 + if( 0 == my_rank ) {
63.74 + my_node->n_parents=0;
63.75 + my_node->parent_rank=-1;
63.76 + my_rank_in_my_level=0;
63.77 + } else {
63.78 + my_node->n_parents=1;
63.79 + cnt=1;
63.80 + cum_cnt=0;
63.81 + for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
63.82 + /* cummulative count up to this level */
63.83 + cum_cnt+=cnt;
63.84 + /* number of ranks in this level */
63.85 + cnt*=tree_order;
63.86 + }
63.87 + my_rank_in_my_level=my_rank-cum_cnt;
63.88 + /* tree_order consecutive ranks have the same parent */
63.89 + my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
63.90 + }
63.91 +
63.92 + /* figure out number of levels in the tree */
63.93 + n_lvls_in_tree=0;
63.94 + result=num_nodes;
63.95 + /* cnt - number of ranks in given level */
63.96 + cnt=1;
63.97 + /* cummulative count of ranks */
63.98 + while( 0 < result ) {
63.99 + result-=cnt;
63.100 + cnt*=tree_order;
63.101 + n_lvls_in_tree++;
63.102 + };
63.103 +
63.104 + my_node->children_ranks=(int *)NULL;
63.105 +
63.106 + /* get list of children */
63.107 + if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
63.108 + /* last level has no children */
63.109 + my_node->n_children=0;
63.110 + } else {
63.111 + cum_cnt=0;
63.112 + cnt=1;
63.113 + for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
63.114 + cum_cnt+=cnt;
63.115 + cnt*=tree_order;
63.116 + }
63.117 + start_index=cum_cnt+my_rank_in_my_level*tree_order;
63.118 + end_index=start_index+tree_order-1;
63.119 +
63.120 + /* don't go out of bounds at the end of the list */
63.121 + if( end_index >= num_nodes ) {
63.122 + end_index = num_nodes-1;
63.123 + }
63.124 +
63.125 + if( start_index <= (num_nodes-1) ) {
63.126 + my_node->n_children=end_index-start_index+1;
63.127 + } else {
63.128 + my_node->n_children=0;
63.129 + }
63.130 +
63.131 + my_node->children_ranks=NULL;
63.132 + if( 0 < my_node->n_children ) {
63.133 + my_node->children_ranks=
63.134 + (int *)malloc( sizeof(int)*my_node->n_children);
63.135 + if( NULL == my_node->children_ranks) {
63.136 + goto Error;
63.137 + }
63.138 + for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
63.139 + my_node->children_ranks[lvl-start_index]=lvl;
63.140 + }
63.141 + }
63.142 + }
63.143 + /* set node type */
63.144 + if( 0 == my_node->n_parents ) {
63.145 + my_node->my_node_type=ROOT_NODE;
63.146 + } else if ( 0 == my_node->n_children ) {
63.147 + my_node->my_node_type=LEAF_NODE;
63.148 + } else {
63.149 + my_node->my_node_type=INTERIOR_NODE;
63.150 + }
63.151 +
63.152 +
63.153 + /* successful return */
63.154 + return OMPI_SUCCESS;
63.155 +
63.156 +Error:
63.157 +
63.158 + /* error return */
63.159 + return OMPI_ERROR;
63.160 +}
63.161 +
63.162 +int netpatterns_setup_narray_knomial_tree(
63.163 + int tree_order, int my_rank, int num_nodes,
63.164 + netpatterns_narray_knomial_tree_node_t *my_node)
63.165 +{
63.166 + /* local variables */
63.167 + int n_levels, result;
63.168 + int my_level_in_tree, cnt ;
63.169 + int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
63.170 + int start_index,end_index;
63.171 + int rc;
63.172 +
63.173 + /* sanity check */
63.174 + if( 1 >= tree_order ) {
63.175 + goto Error;
63.176 + }
63.177 +
63.178 + my_node->my_rank=my_rank;
63.179 + my_node->tree_size=num_nodes;
63.180 +
63.181 + /* figure out number of levels in tree */
63.182 + n_levels=0;
63.183 + result=num_nodes-1;
63.184 + while (0 < result ) {
63.185 + result/=tree_order;
63.186 + n_levels++;
63.187 + };
63.188 +
63.189 + /* figure out who my children and parents are */
63.190 + my_level_in_tree=-1;
63.191 + result=my_rank;
63.192 + /* cnt - number of ranks in given level */
63.193 + cnt=1;
63.194 + /* cummulative count of ranks */
63.195 + while( 0 <= result ) {
63.196 + result-=cnt;
63.197 + cnt*=tree_order;
63.198 + my_level_in_tree++;
63.199 + };
63.200 + /* int my_level_in_tree, n_children, n_parents; */
63.201 +
63.202 + if( 0 == my_rank ) {
63.203 + my_node->n_parents=0;
63.204 + my_node->parent_rank=-1;
63.205 + my_rank_in_my_level=0;
63.206 + } else {
63.207 + my_node->n_parents=1;
63.208 + cnt=1;
63.209 + cum_cnt=0;
63.210 + for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
63.211 + /* cummulative count up to this level */
63.212 + cum_cnt+=cnt;
63.213 + /* number of ranks in this level */
63.214 + cnt*=tree_order;
63.215 + }
63.216 +
63.217 + my_node->rank_on_level =
63.218 + my_rank_in_my_level =
63.219 + my_rank-cum_cnt;
63.220 + my_node->level_size = cnt;
63.221 +
63.222 + rc = netpatterns_setup_recursive_knomial_tree_node(
63.223 + my_node->level_size, my_node->rank_on_level,
63.224 + tree_order, &my_node->k_node);
63.225 + if (OMPI_SUCCESS != rc) {
63.226 + goto Error;
63.227 + }
63.228 +
63.229 + /* tree_order consecutive ranks have the same parent */
63.230 + my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
63.231 + }
63.232 +
63.233 + /* figure out number of levels in the tree */
63.234 + n_lvls_in_tree=0;
63.235 + result=num_nodes;
63.236 + /* cnt - number of ranks in given level */
63.237 + cnt=1;
63.238 + /* cummulative count of ranks */
63.239 + while( 0 < result ) {
63.240 + result-=cnt;
63.241 + cnt*=tree_order;
63.242 + n_lvls_in_tree++;
63.243 + };
63.244 +
63.245 + if(result < 0) {
63.246 + /* reset the size on group */
63.247 + num_nodes = cnt / tree_order;
63.248 + }
63.249 +
63.250 + my_node->children_ranks=(int *)NULL;
63.251 +
63.252 + /* get list of children */
63.253 + if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
63.254 + /* last level has no children */
63.255 + my_node->n_children=0;
63.256 + } else {
63.257 + cum_cnt=0;
63.258 + cnt=1;
63.259 + for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
63.260 + cum_cnt+=cnt;
63.261 + cnt*=tree_order;
63.262 + }
63.263 + start_index=cum_cnt+my_rank_in_my_level*tree_order;
63.264 + end_index=start_index+tree_order-1;
63.265 +
63.266 + /* don't go out of bounds at the end of the list */
63.267 + if( end_index >= num_nodes ) {
63.268 + end_index = num_nodes-1;
63.269 + }
63.270 +
63.271 + if( start_index <= (num_nodes-1) ) {
63.272 + my_node->n_children=end_index-start_index+1;
63.273 + } else {
63.274 + my_node->n_children=0;
63.275 + }
63.276 +
63.277 + my_node->children_ranks=NULL;
63.278 + if( 0 < my_node->n_children ) {
63.279 + my_node->children_ranks=
63.280 + (int *)malloc( sizeof(int)*my_node->n_children);
63.281 + if( NULL == my_node->children_ranks) {
63.282 + goto Error;
63.283 + }
63.284 + for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
63.285 + my_node->children_ranks[lvl-start_index]=lvl;
63.286 + }
63.287 + }
63.288 + }
63.289 + /* set node type */
63.290 + if( 0 == my_node->n_parents ) {
63.291 + my_node->my_node_type=ROOT_NODE;
63.292 + } else if ( 0 == my_node->n_children ) {
63.293 + my_node->my_node_type=LEAF_NODE;
63.294 + } else {
63.295 + my_node->my_node_type=INTERIOR_NODE;
63.296 + }
63.297 +
63.298 +
63.299 + /* successful return */
63.300 + return OMPI_SUCCESS;
63.301 +
63.302 +Error:
63.303 +
63.304 + /* error return */
63.305 + return OMPI_ERROR;
63.306 +}
63.307 +
63.308 +/* calculate the nearest power of radix that is equal to or greater
63.309 + * than size, with the specified radix. The resulting tree is of
63.310 + * depth n_lvls.
63.311 + */
63.312 +OMPI_DECLSPEC int roundup_to_power_radix ( int radix, int size, int *n_lvls )
63.313 +{
63.314 + int n_levels=0, return_value=1;
63.315 + int result;
63.316 + if( 1 > size ) {
63.317 + return 0;
63.318 + }
63.319 +
63.320 + result=size-1;
63.321 + while (0 < result ) {
63.322 + result/=radix;
63.323 + n_levels++;
63.324 + return_value*=radix;
63.325 + };
63.326 + *n_lvls=n_levels;
63.327 + return return_value;
63.328 +}
63.329 +
63.330 +static int fill_in_node_data(int tree_order, int num_nodes, int my_node,
63.331 + netpatterns_tree_node_t *nodes_data)
63.332 +{
63.333 + /* local variables */
63.334 + int rc, num_ranks_per_child, num_children, n_extra;
63.335 + int child, rank, n_to_offset, n_ranks_to_child;
63.336 +
63.337 + /* figure out who are my children */
63.338 + num_ranks_per_child=num_nodes/tree_order;
63.339 + if( num_ranks_per_child ) {
63.340 + num_children=tree_order;
63.341 + n_extra=num_nodes-num_ranks_per_child*tree_order;
63.342 + } else {
63.343 + num_children=num_nodes;
63.344 + /* each child has the same number of descendents - 1 */
63.345 + n_extra=0;
63.346 + /* when there is a child, there is at least one
63.347 + * descendent */
63.348 + num_ranks_per_child=1;
63.349 + }
63.350 +
63.351 + nodes_data[my_node].n_children=num_children;
63.352 + if( num_children ) {
63.353 + nodes_data[my_node].children_ranks=(int *)
63.354 + malloc(sizeof(int)*num_children);
63.355 + if(!nodes_data[my_node].children_ranks) {
63.356 +
63.357 + if ( NULL == nodes_data[my_node].children_ranks )
63.358 + {
63.359 + fprintf(stderr, "Cannot allocate memory for children_ranks.\n");
63.360 + rc = OMPI_ERR_OUT_OF_RESOURCE;
63.361 + goto error;
63.362 + }
63.363 + }
63.364 + }
63.365 +
63.366 + rank = my_node;
63.367 + for( child=0 ; child < num_children ; child ++ ) {
63.368 +
63.369 + /* set parent information */
63.370 + nodes_data[rank].n_parents=1;
63.371 + nodes_data[rank].parent_rank=my_node;
63.372 + if( n_extra ) {
63.373 + n_to_offset=child;
63.374 + if( n_to_offset > n_extra){
63.375 + n_to_offset=n_extra;
63.376 + }
63.377 + } else {
63.378 + n_to_offset=0;
63.379 + }
63.380 +
63.381 + rank=my_node+1+child*num_ranks_per_child;
63.382 + rank+=n_to_offset;
63.383 +
63.384 + /* set parent information */
63.385 + nodes_data[rank].n_parents=1;
63.386 + nodes_data[rank].parent_rank=my_node;
63.387 +
63.388 + n_ranks_to_child=num_ranks_per_child;
63.389 + if(n_extra && (child < n_extra) ) {
63.390 + n_ranks_to_child++;
63.391 + }
63.392 +
63.393 + /* set child information */
63.394 + nodes_data[my_node].children_ranks[child]=rank;
63.395 +
63.396 + /* remove the child from the list of ranks */
63.397 + n_ranks_to_child--;
63.398 + rc=fill_in_node_data(tree_order, n_ranks_to_child, rank, nodes_data);
63.399 + if( OMPI_SUCCESS != rc ) {
63.400 + goto error;
63.401 + }
63.402 +
63.403 + }
63.404 +
63.405 + /* return */
63.406 + return OMPI_SUCCESS;
63.407 +
63.408 + /* Error */
63.409 +error:
63.410 + return rc;
63.411 +
63.412 +}
63.413 +
63.414 +/*
63.415 + * This routine sets up the array describing the communication tree for
63.416 + * a k-ary tree where the children form a contiguous range of ranks at
63.417 + * each level. The assumption here is that rank 0 is always the root -
63.418 + * ranks may be rotated based on who the actual root is, to obtain the
63.419 + * appropriate communication pattern for such roots.
63.420 + */
63.421 +OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(
63.422 + int tree_order, int num_nodes,
63.423 + netpatterns_tree_node_t **tree_nodes)
63.424 +{
63.425 + /* local variables */
63.426 + int num_descendent_ranks=num_nodes-1;
63.427 + int rc=OMPI_SUCCESS;
63.428 +
63.429 + *tree_nodes=(netpatterns_tree_node_t *)malloc(
63.430 + sizeof(netpatterns_tree_node_t)*
63.431 + num_nodes);
63.432 + if(!(*tree_nodes) ) {
63.433 + fprintf(stderr, "Cannot allocate memory for tree_nodes.\n");
63.434 + rc = OMPI_ERR_OUT_OF_RESOURCE;
63.435 + return rc;
63.436 + }
63.437 +
63.438 + (*tree_nodes)[0].n_parents=0;
63.439 + rc=fill_in_node_data(tree_order,
63.440 + num_descendent_ranks, 0, *tree_nodes);
63.441 +
63.442 + /* successful return */
63.443 + return rc;
63.444 +
63.445 +}