1.1 --- a/ompi/Makefile.am Tue Feb 19 22:36:41 2013 +0000
1.2 +++ b/ompi/Makefile.am Tue Feb 19 22:50:56 2013 +0000
1.3 @@ -171,6 +171,8 @@
1.4 include mpi/Makefile.am
1.5 include mpi/man/man3/Makefile.extra
1.6 include mpiext/Makefile.am
1.7 +include patterns/net/Makefile.am
1.8 +include patterns/comm/Makefile.am
1.9
1.10 # Ensure that the man page directory exists before we try to make man
1.11 # page files (because ompi/mpi/man/man3 has no config.status-generated
2.1 --- a/ompi/mca/bcol/basesmuma/Makefile.am Tue Feb 19 22:36:41 2013 +0000
2.2 +++ b/ompi/mca/bcol/basesmuma/Makefile.am Tue Feb 19 22:50:56 2013 +0000
2.3 @@ -51,9 +51,7 @@
2.4 mca_bcol_basesmuma_la_SOURCES = $(sources)
2.5 mca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
2.6 mca_bcol_basesmuma_la_LIBADD = \
2.7 - $(btl_portals_LIBS) \
2.8 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la \
2.9 - $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la
2.10 + $(btl_portals_LIBS)
2.11
2.12
2.13 noinst_LTLIBRARIES = $(component_noinst)
3.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h Tue Feb 19 22:36:41 2013 +0000
3.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma.h Tue Feb 19 22:50:56 2013 +0000
3.3 @@ -19,7 +19,7 @@
3.4 #include "ompi/mca/coll/ml/coll_ml_allocation.h"
3.5 #include "ompi/request/request.h"
3.6 #include "ompi/proc/proc.h"
3.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
3.8 +#include "ompi/patterns/net/netpatterns.h"
3.9
3.10 #include "orte/util/name_fns.h"
3.11 #include "orte/util/proc_info.h"
3.12 @@ -777,16 +777,16 @@
3.13 sm_buffer_mgmt colls_with_user_data;
3.14
3.15 /* recursive-doubling tree node */
3.16 - mca_common_netpatterns_pair_exchange_node_t recursive_doubling_tree;
3.17 + netpatterns_pair_exchange_node_t recursive_doubling_tree;
3.18
3.19 /* k-nomial gather/allgather tree */
3.20 - mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
3.21 + netpatterns_k_exchange_node_t knomial_allgather_tree;
3.22
3.23 /* fanin tree node - root is rank 0 */
3.24 - mca_common_netpatterns_tree_node_t fanin_node;
3.25 + netpatterns_tree_node_t fanin_node;
3.26
3.27 /* fanout tree node - root is rank 0 */
3.28 - mca_common_netpatterns_tree_node_t fanout_node;
3.29 + netpatterns_tree_node_t fanout_node;
3.30
3.31 /* index of blocking barrier memory region to use */
3.32 int index_blocking_barrier_memory_bank;
3.33 @@ -795,18 +795,18 @@
3.34 int *comm_to_sm_map;
3.35
3.36 /* reduction fanout tree */
3.37 - mca_common_netpatterns_tree_node_t* reduction_tree;
3.38 + netpatterns_tree_node_t* reduction_tree;
3.39
3.40 /* broadcast fanout tree */
3.41 - mca_common_netpatterns_tree_node_t* fanout_read_tree;
3.42 + netpatterns_tree_node_t* fanout_read_tree;
3.43
3.44 /* scatter - k-ary tree */
3.45 int scatter_kary_radix;
3.46 - mca_common_netpatterns_tree_node_t *scatter_kary_tree;
3.47 + netpatterns_tree_node_t *scatter_kary_tree;
3.48
3.49 /* Knomial exchange tree */
3.50 /* Currently used for only large message reduce */
3.51 - mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
3.52 + netpatterns_k_exchange_node_t knomial_exchange_tree;
3.53
3.54 /* sequence number offset - want to make sure that we start
3.55 * id'ing collectives with id 0, so we can have simple
4.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c Tue Feb 19 22:36:41 2013 +0000
4.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c Tue Feb 19 22:50:56 2013 +0000
4.3 @@ -135,7 +135,7 @@
4.4 volatile char* parent_data_pointer;
4.5 mca_bcol_basesmuma_header_t *my_ctl_pointer;
4.6 volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
4.7 - mca_common_netpatterns_tree_node_t* my_fanout_read_tree;
4.8 + netpatterns_tree_node_t* my_fanout_read_tree;
4.9 size_t pack_len = 0, dt_size;
4.10
4.11 void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
4.12 @@ -268,7 +268,7 @@
4.13 mca_bcol_basesmuma_module_t* bcol_module=
4.14 (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
4.15
4.16 - mca_common_netpatterns_tree_node_t* my_fanout_read_tree;
4.17 + netpatterns_tree_node_t* my_fanout_read_tree;
4.18 size_t pack_len = 0, dt_size;
4.19
4.20 void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
5.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c Tue Feb 19 22:36:41 2013 +0000
5.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c Tue Feb 19 22:50:56 2013 +0000
5.3 @@ -18,7 +18,7 @@
5.4 #include "ompi/mca/bcol/bcol.h"
5.5 #include "ompi/mca/bcol/base/base.h"
5.6 #include "ompi/mca/coll/ml/coll_ml.h"
5.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
5.8 +#include "ompi/patterns/comm/coll_ops.h"
5.9 #include "ompi/mca/dpm/dpm.h"
5.10
5.11
6.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c Tue Feb 19 22:36:41 2013 +0000
6.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c Tue Feb 19 22:50:56 2013 +0000
6.3 @@ -14,7 +14,7 @@
6.4 #include "ompi/constants.h"
6.5 #include "ompi/communicator/communicator.h"
6.6 #include "ompi/mca/bcol/bcol.h"
6.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
6.8 +#include "ompi/patterns/net/netpatterns.h"
6.9
6.10 #include "opal/sys/atomic.h"
6.11
6.12 @@ -52,7 +52,7 @@
6.13 volatile mca_bcol_basesmuma_header_t *child_ctl;
6.14
6.15
6.16 - mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.17 + netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.18
6.19 /* Figure out - what instance of the basesmuma bcol I am */
6.20 sequence_number = input_args->sequence_num;
6.21 @@ -134,7 +134,7 @@
6.22 volatile mca_bcol_basesmuma_header_t *child_ctl;
6.23
6.24
6.25 - mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.26 + netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
6.27
6.28 sequence_number = input_args->sequence_num;
6.29
7.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c Tue Feb 19 22:36:41 2013 +0000
7.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c Tue Feb 19 22:50:56 2013 +0000
7.3 @@ -14,7 +14,7 @@
7.4 #include "ompi/constants.h"
7.5 #include "ompi/communicator/communicator.h"
7.6 #include "ompi/mca/bcol/bcol.h"
7.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
7.8 +#include "ompi/patterns/net/netpatterns.h"
7.9
7.10 #include "opal/sys/atomic.h"
7.11
7.12 @@ -50,7 +50,7 @@
7.13 volatile mca_bcol_basesmuma_header_t *parent_ctl;
7.14
7.15
7.16 - mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
7.17 + netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
7.18
7.19 /* Figure out - what instance of the basesmuma bcol I am */
7.20 sequence_number = input_args->sequence_num;
8.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c Tue Feb 19 22:36:41 2013 +0000
8.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c Tue Feb 19 22:50:56 2013 +0000
8.3 @@ -19,7 +19,7 @@
8.4 #include "ompi/mca/bcol/bcol.h"
8.5 #include "ompi/mca/bcol/base/base.h"
8.6 #include "ompi/mca/dpm/dpm.h"
8.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
8.8 +#include "ompi/patterns/net/netpatterns.h"
8.9
8.10
8.11 #include "orte/mca/grpcomm/grpcomm.h"
8.12 @@ -221,7 +221,7 @@
8.13 *sm_module)
8.14 {
8.15 int rc = OMPI_SUCCESS;
8.16 - rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
8.17 + rc = netpatterns_setup_recursive_knomial_tree_node(
8.18 sm_module->super.sbgp_partner_module->group_size,
8.19 sm_module->super.sbgp_partner_module->my_index,
8.20 mca_bcol_basesmuma_component.k_nomial_radix,
8.21 @@ -234,7 +234,7 @@
8.22 {
8.23 mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super;
8.24
8.25 - return mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
8.26 + return netpatterns_setup_recursive_knomial_allgather_tree_node(
8.27 sm_module->super.sbgp_partner_module->group_size,
8.28 sm_module->super.sbgp_partner_module->my_index,
8.29 mca_bcol_basesmuma_component.k_nomial_radix,
8.30 @@ -294,7 +294,7 @@
8.31 sm_module->reduction_tree = NULL;
8.32 sm_module->fanout_read_tree = NULL;
8.33
8.34 - ret=mca_common_netpatterns_setup_recursive_doubling_tree_node(
8.35 + ret=netpatterns_setup_recursive_doubling_tree_node(
8.36 module->group_size,module->my_index,
8.37 &(sm_module->recursive_doubling_tree));
8.38 if(OMPI_SUCCESS != ret) {
8.39 @@ -306,7 +306,7 @@
8.40 /* setup the fanin tree - this is used only as part of a hierarchical
8.41 * barrier, so will set this up with rank 0 as the root */
8.42 my_rank=module->my_index;
8.43 - ret=mca_common_netpatterns_setup_narray_tree(cs->radix_fanin,
8.44 + ret=netpatterns_setup_narray_tree(cs->radix_fanin,
8.45 my_rank,module->group_size,&(sm_module->fanin_node));
8.46 if(OMPI_SUCCESS != ret) {
8.47 fprintf(stderr,"Error setting up fanin tree \n");
8.48 @@ -316,7 +316,7 @@
8.49
8.50 /* setup the fanout tree - this is used only as part of a hierarchical
8.51 * barrier, so will set this up with rank 0 as the root */
8.52 - ret=mca_common_netpatterns_setup_narray_tree(cs->radix_fanout,
8.53 + ret=netpatterns_setup_narray_tree(cs->radix_fanout,
8.54 my_rank,module->group_size,&(sm_module->fanout_node));
8.55 if(OMPI_SUCCESS != ret) {
8.56 fprintf(stderr,"Error setting up fanout tree \n");
8.57 @@ -333,14 +333,14 @@
8.58 bcast_radix = cs->radix_read_tree;
8.59
8.60 /* initialize fan-out read tree */
8.61 - sm_module->fanout_read_tree=(mca_common_netpatterns_tree_node_t*) malloc(
8.62 - sizeof(mca_common_netpatterns_tree_node_t)*module->group_size);
8.63 + sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc(
8.64 + sizeof(netpatterns_tree_node_t)*module->group_size);
8.65 if( NULL == sm_module->fanout_read_tree ) {
8.66 goto Error;
8.67 }
8.68
8.69 for(i = 0; i < module->group_size; i++){
8.70 - ret = mca_common_netpatterns_setup_narray_tree(bcast_radix,
8.71 + ret = netpatterns_setup_narray_tree(bcast_radix,
8.72 i, module->group_size, &(sm_module->fanout_read_tree[i]));
8.73 if(OMPI_SUCCESS != ret) {
8.74 goto Error;
8.75 @@ -363,13 +363,13 @@
8.76 */
8.77
8.78 /* initialize reduction tree */
8.79 - sm_module->reduction_tree=(mca_common_netpatterns_tree_node_t *) malloc(
8.80 - sizeof(mca_common_netpatterns_tree_node_t )*module->group_size);
8.81 + sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc(
8.82 + sizeof(netpatterns_tree_node_t )*module->group_size);
8.83 if( NULL == sm_module->reduction_tree ) {
8.84 goto Error;
8.85 }
8.86
8.87 - ret=mca_common_netpatterns_setup_multinomial_tree(
8.88 + ret=netpatterns_setup_multinomial_tree(
8.89 cs->order_reduction_tree,module->group_size,
8.90 sm_module->reduction_tree);
8.91 if( MPI_SUCCESS != ret ) {
8.92 @@ -393,7 +393,7 @@
8.93 */
8.94 sm_module->scatter_kary_radix=cs->scatter_kary_radix;
8.95 sm_module->scatter_kary_tree=NULL;
8.96 - ret=mca_common_netpatterns_setup_narray_tree_contigous_ranks(
8.97 + ret=netpatterns_setup_narray_tree_contigous_ranks(
8.98 sm_module->scatter_kary_radix,
8.99 sm_module->super.sbgp_partner_module->group_size,
8.100 &(sm_module->scatter_kary_tree));
9.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c Tue Feb 19 22:36:41 2013 +0000
9.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c Tue Feb 19 22:50:56 2013 +0000
9.3 @@ -17,7 +17,7 @@
9.4 #include "ompi/mca/bcol/bcol.h"
9.5 #include "bcol_basesmuma.h"
9.6 #include "opal/sys/atomic.h"
9.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
9.8 +#include "ompi/patterns/net/netpatterns.h"
9.9
9.10 /*
9.11 * Initialize nonblocking barrier. This is code specific for handling
9.12 @@ -35,7 +35,7 @@
9.13 int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
9.14 int pair_rank;
9.15 mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
9.16 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node;
9.17 + netpatterns_pair_exchange_node_t *my_exchange_node;
9.18 int extra_rank, my_rank, pow_2;
9.19 mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
9.20 mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
9.21 @@ -205,7 +205,7 @@
9.22 int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
9.23 int pair_rank, start_index, restart_phase;
9.24 mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
9.25 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node;
9.26 + netpatterns_pair_exchange_node_t *my_exchange_node;
9.27 int extra_rank, my_rank, pow_2;
9.28 mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
9.29 mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
10.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c Tue Feb 19 22:36:41 2013 +0000
10.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c Tue Feb 19 22:50:56 2013 +0000
10.3 @@ -55,7 +55,7 @@
10.4 int flag_offset = 0;
10.5 volatile int8_t ready_flag;
10.6 mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
10.7 - mca_common_netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.8 + netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.9 mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
10.10 uint32_t buffer_index = input_args->buffer_index;
10.11 int *active_requests =
10.12 @@ -244,7 +244,7 @@
10.13 int flag_offset;
10.14 volatile int8_t ready_flag;
10.15 mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
10.16 - mca_common_netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.17 + netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
10.18 mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
10.19 uint32_t buffer_index = input_args->buffer_index;
10.20 int *active_requests =
11.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c Tue Feb 19 22:36:41 2013 +0000
11.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c Tue Feb 19 22:50:56 2013 +0000
11.3 @@ -21,7 +21,7 @@
11.4 #include "ompi/mca/bcol/bcol.h"
11.5 #include "ompi/mca/bcol/base/base.h"
11.6 #include "ompi/mca/dpm/dpm.h"
11.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
11.8 +#include "ompi/patterns/comm/coll_ops.h"
11.9
11.10 #include "orte/mca/rml/rml.h"
11.11 #include "orte/mca/rml/rml_types.h"
12.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c Tue Feb 19 22:36:41 2013 +0000
12.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c Tue Feb 19 22:50:56 2013 +0000
12.3 @@ -18,7 +18,7 @@
12.4
12.5 #include "ompi/mca/dpm/dpm.h"
12.6 #include "ompi/proc/proc.h"
12.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
12.8 +#include "ompi/patterns/comm/coll_ops.h"
12.9
12.10 #include "orte/util/show_help.h"
12.11 #include "orte/util/name_fns.h"
13.1 --- a/ompi/mca/bcol/bcol.h Tue Feb 19 22:36:41 2013 +0000
13.2 +++ b/ompi/mca/bcol/bcol.h Tue Feb 19 22:50:56 2013 +0000
13.3 @@ -20,7 +20,7 @@
13.4 #include "ompi/datatype/ompi_datatype.h"
13.5 #include "ompi/op/op.h"
13.6 #include "ompi/include/ompi/constants.h"
13.7 -#include "ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h"
13.8 +#include "ompi/patterns/net/netpatterns_knomial_tree.h"
13.9
13.10 #include <limits.h>
13.11
14.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload.h Tue Feb 19 22:36:41 2013 +0000
14.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload.h Tue Feb 19 22:50:56 2013 +0000
14.3 @@ -366,16 +366,16 @@
14.4 opal_list_t collfrag_pending;
14.5
14.6 /* recursive-doubling tree node */
14.7 - mca_common_netpatterns_pair_exchange_node_t recursive_doubling_tree;
14.8 + netpatterns_pair_exchange_node_t recursive_doubling_tree;
14.9
14.10 /* N exchange tree */
14.11 - mca_common_netpatterns_pair_exchange_node_t n_exchange_tree;
14.12 + netpatterns_pair_exchange_node_t n_exchange_tree;
14.13
14.14 /* Knomial exchange tree */
14.15 - mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
14.16 + netpatterns_k_exchange_node_t knomial_exchange_tree;
14.17
14.18 /* Knomial exchange tree */
14.19 - mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
14.20 + netpatterns_k_exchange_node_t knomial_allgather_tree;
14.21
14.22 /* The array will keep pre-calculated task consumption per
14.23 * algorithm
15.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c Tue Feb 19 22:36:41 2013 +0000
15.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c Tue Feb 19 22:50:56 2013 +0000
15.3 @@ -54,7 +54,7 @@
15.4 mca_bcol_iboffload_frag_t *send_fragment = NULL,
15.5 *preposted_recv_frag = NULL;
15.6
15.7 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
15.8 + netpatterns_pair_exchange_node_t *my_exchange_node =
15.9 &iboffload->recursive_doubling_tree;
15.10
15.11 IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_doubling.\n"));
15.12 @@ -364,7 +364,7 @@
15.13 /* Recursive K - ing*/
15.14 static int recursive_knomial_start_connections(struct mca_bcol_iboffload_module_t *iboffload)
15.15 {
15.16 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
15.17 + netpatterns_k_exchange_node_t *my_exchange_node =
15.18 &iboffload->knomial_exchange_tree;
15.19 int k, i, n_exchanges = my_exchange_node->n_exchanges,
15.20 **exchanges = my_exchange_node->rank_exchanges,
15.21 @@ -442,7 +442,7 @@
15.22 mca_bcol_iboffload_frag_t *send_fragment = NULL,
15.23 *preposted_recv_frag = NULL;
15.24
15.25 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
15.26 + netpatterns_k_exchange_node_t *my_exchange_node =
15.27 &iboffload->knomial_exchange_tree;
15.28 IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_knomial. Node type %d\n", my_exchange_node->node_type));
15.29
15.30 @@ -706,7 +706,7 @@
15.31
15.32 int mca_bcol_iboffload_rec_doubling_start_connections(mca_bcol_iboffload_module_t *iboffload)
15.33 {
15.34 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
15.35 + netpatterns_pair_exchange_node_t *my_exchange_node =
15.36 &iboffload->recursive_doubling_tree;
15.37
15.38 int i, n_exchanges = my_exchange_node->n_exchanges,
16.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c Tue Feb 19 22:36:41 2013 +0000
16.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c Tue Feb 19 22:50:56 2013 +0000
16.3 @@ -203,7 +203,7 @@
16.4 static int mca_bcol_iboffload_small_msg_bcast_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.5 mca_bcol_iboffload_collreq_t *coll_request)
16.6 {
16.7 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.8 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.9 &iboffload_module->recursive_doubling_tree;
16.10
16.11 int rc,
16.12 @@ -396,7 +396,7 @@
16.13 static int mca_bcol_iboffload_small_msg_bcast_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.14 mca_bcol_iboffload_collreq_t *coll_request)
16.15 {
16.16 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.17 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.18 &iboffload_module->recursive_doubling_tree;
16.19
16.20 int rc,
16.21 @@ -617,7 +617,7 @@
16.22 static int mca_bcol_iboffload_bcast_scatter_allgather_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.23 mca_bcol_iboffload_collreq_t *coll_request)
16.24 {
16.25 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.26 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.27 &iboffload_module->recursive_doubling_tree;
16.28
16.29 int rc,
16.30 @@ -857,7 +857,7 @@
16.31 static int mca_bcol_iboffload_bcast_scatter_allgather_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
16.32 mca_bcol_iboffload_collreq_t *coll_request)
16.33 {
16.34 - mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.35 + netpatterns_pair_exchange_node_t *recursive_doubling_tree =
16.36 &iboffload_module->recursive_doubling_tree;
16.37
16.38 int rc, dst;
17.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h Tue Feb 19 22:36:41 2013 +0000
17.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h Tue Feb 19 22:50:56 2013 +0000
17.3 @@ -364,7 +364,7 @@
17.4
17.5 static inline void bcol_iboffload_setup_binomial_connection(mca_bcol_iboffload_module_t *iboffload)
17.6 {
17.7 - mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
17.8 + netpatterns_pair_exchange_node_t *my_exchange_node =
17.9 &iboffload->recursive_doubling_tree;
17.10
17.11 int i, n_exchanges = my_exchange_node->n_exchanges,
18.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c Tue Feb 19 22:36:41 2013 +0000
18.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c Tue Feb 19 22:50:56 2013 +0000
18.3 @@ -178,8 +178,8 @@
18.4 free(module->endpoints);
18.5 }
18.6
18.7 - mca_common_netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
18.8 - mca_common_netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
18.9 + netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
18.10 + netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
18.11
18.12 OBJ_RELEASE(module->device->net_context);
18.13 OBJ_RELEASE(module->device);
18.14 @@ -745,7 +745,7 @@
18.15 {
18.16 int rc;
18.17 mca_bcol_iboffload_module_t *ib_module = (mca_bcol_iboffload_module_t *) super;
18.18 - rc = mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
18.19 + rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
18.20 ib_module->super.sbgp_partner_module->group_size,
18.21 ib_module->super.sbgp_partner_module->my_index,
18.22 mca_bcol_iboffload_component.k_nomial_radix,
18.23 @@ -1090,7 +1090,7 @@
18.24 /* Barrier initialization - recuresive doubling */
18.25 #if 1
18.26 if (OMPI_SUCCESS !=
18.27 - mca_common_netpatterns_setup_recursive_doubling_tree_node(
18.28 + netpatterns_setup_recursive_doubling_tree_node(
18.29 iboffload_module->group_size, my_rank,
18.30 &iboffload_module->recursive_doubling_tree)) {
18.31 IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree,"
18.32 @@ -1101,7 +1101,7 @@
18.33
18.34 /* Barrier initialization - N exchange tree */
18.35 if (OMPI_SUCCESS !=
18.36 - mca_common_netpatterns_setup_recursive_doubling_n_tree_node(
18.37 + netpatterns_setup_recursive_doubling_n_tree_node(
18.38 iboffload_module->group_size, my_rank,
18.39 cm->exchange_tree_order,
18.40 &iboffload_module->n_exchange_tree)) {
18.41 @@ -1113,7 +1113,7 @@
18.42
18.43 /* Recursive K-ing initialization - Knomial exchange tree */
18.44 if (OMPI_SUCCESS !=
18.45 - mca_common_netpatterns_setup_recursive_knomial_tree_node(
18.46 + netpatterns_setup_recursive_knomial_tree_node(
18.47 iboffload_module->group_size, my_rank,
18.48 cm->knomial_tree_order,
18.49 &iboffload_module->knomial_exchange_tree)) {
18.50 @@ -1156,7 +1156,7 @@
18.51 }
18.52 /* that should take care of that */
18.53 if (OMPI_SUCCESS !=
18.54 - mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
18.55 + netpatterns_setup_recursive_knomial_allgather_tree_node(
18.56 iboffload_module->group_size, sbgp->group_list[my_rank],
18.57 cm->k_nomial_radix, iboffload_module->super.list_n_connected,
18.58 &iboffload_module->knomial_allgather_tree)) {
19.1 --- a/ompi/mca/bcol/ptpcoll/Makefile.am Tue Feb 19 22:36:41 2013 +0000
19.2 +++ b/ompi/mca/bcol/ptpcoll/Makefile.am Tue Feb 19 22:50:56 2013 +0000
19.3 @@ -44,8 +44,7 @@
19.4 mcacomponent_LTLIBRARIES = $(component_install)
19.5 mca_bcol_ptpcoll_la_SOURCES = $(sources)
19.6 mca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
19.7 -mca_bcol_ptpcoll_la_LIBADD = \
19.8 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
19.9 +mca_bcol_ptpcoll_la_LIBADD =
19.10
19.11 noinst_LTLIBRARIES = $(component_noinst)
19.12 libmca_bcol_ptpcoll_la_SOURCES =$(sources)
20.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h Tue Feb 19 22:36:41 2013 +0000
20.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h Tue Feb 19 22:50:56 2013 +0000
20.3 @@ -20,7 +20,7 @@
20.4 #include "ompi/request/request.h"
20.5 #include "ompi/mca/pml/pml.h"
20.6 #include "ompi/mca/coll/ml/coll_ml_allocation.h"
20.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
20.8 +#include "ompi/patterns/net/netpatterns.h"
20.9
20.10 BEGIN_C_DECLS
20.11
20.12 @@ -297,7 +297,7 @@
20.13 int full_narray_tree_num_leafs;
20.14
20.15 /* Nary tree info */
20.16 - mca_common_netpatterns_tree_node_t *narray_node;
20.17 + netpatterns_tree_node_t *narray_node;
20.18
20.19 /* if the rank in group, it keeps the extra peer.
20.20 if the rank is extra, it keeps the proxy peer.
20.21 @@ -328,13 +328,13 @@
20.22 /* number of extra peers , maximum k - 1*/
20.23 int narray_knomial_proxy_num;
20.24 /* Narray-Knomial node information array */
20.25 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
20.26 + netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
20.27 /* Knomial exchange tree */
20.28 - mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
20.29 + netpatterns_k_exchange_node_t knomial_exchange_tree;
20.30 /* knomial allgather tree --- Do not disable, we need both
20.31 different algorithms define recursive k - ing differently
20.32 */
20.33 - mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
20.34 + netpatterns_k_exchange_node_t knomial_allgather_tree;
20.35
20.36 /* Knomial allgather offsets */
20.37 int **allgather_offsets;
21.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c Tue Feb 19 22:36:41 2013 +0000
21.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c Tue Feb 19 22:50:56 2013 +0000
21.3 @@ -34,7 +34,7 @@
21.4 mca_bcol_ptpcoll_module_t *ptpcoll_module =
21.5 (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
21.6
21.7 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
21.8 + netpatterns_k_exchange_node_t *my_exchange_node =
21.9 &ptpcoll_module->knomial_exchange_tree;
21.10
21.11 int rc, k, pair_comm_rank, exchange, completed,
21.12 @@ -223,7 +223,7 @@
21.13 mca_bcol_ptpcoll_module_t *ptpcoll_module =
21.14 (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
21.15
21.16 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
21.17 + netpatterns_k_exchange_node_t *my_exchange_node =
21.18 &ptpcoll_module->knomial_exchange_tree;
21.19
21.20 int rc, k, tag, pair_comm_rank, exchange,
21.21 @@ -371,7 +371,7 @@
21.22 mca_bcol_ptpcoll_module_t *ptpcoll_module =
21.23 (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
21.24
21.25 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
21.26 + netpatterns_k_exchange_node_t *my_exchange_node =
21.27 &ptpcoll_module->knomial_exchange_tree;
21.28
21.29 ompi_communicator_t *comm =
21.30 @@ -862,7 +862,7 @@
21.31
21.32 static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
21.33 {
21.34 - mca_common_netpatterns_k_exchange_node_t *my_exchange_node;
21.35 + netpatterns_k_exchange_node_t *my_exchange_node;
21.36 mca_bcol_ptpcoll_module_t * ptpcoll_module =
21.37 (mca_bcol_ptpcoll_module_t *) super;
21.38
22.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c Tue Feb 19 22:36:41 2013 +0000
22.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c Tue Feb 19 22:50:56 2013 +0000
22.3 @@ -141,7 +141,7 @@
22.4 int count = input_args->count * input_args->dtype->super.size;
22.5 int *active_requests =
22.6 &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
22.7 - mca_common_netpatter_knomial_step_info_t step_info = {0, 0, 0};
22.8 + netpatterns_knomial_step_info_t step_info = {0, 0, 0};
22.9
22.10 PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
22.11 const_args->index_of_this_type_in_collective + 1,
22.12 @@ -529,7 +529,7 @@
22.13 /* No data was received. Waiting for data */
22.14 if (0 == (*active_requests)) {
22.15 int extra_root = -1;
22.16 - mca_common_netpatter_knomial_step_info_t step_info;
22.17 + netpatterns_knomial_step_info_t step_info;
22.18 /* We can not block. So run couple of test for data arrival */
22.19 if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
22.20 PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)",
22.21 @@ -624,7 +624,7 @@
22.22 int matched = 0;
22.23 int k_level, logk_level;
22.24 int extra_root = -1;
22.25 - mca_common_netpatter_knomial_step_info_t step_info;
22.26 + netpatterns_knomial_step_info_t step_info;
22.27
22.28 PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d",
22.29 const_args->index_of_this_type_in_collective + 1,
22.30 @@ -694,7 +694,7 @@
22.31 }
22.32 }
22.33
22.34 - data_src = mca_common_netpatterns_get_knomial_data_source(
22.35 + data_src = netpatterns_get_knomial_data_source(
22.36 my_group_index, group_root_index, radix, ptpcoll_module->pow_knum,
22.37 &k_level, &logk_level);
22.38
22.39 @@ -1709,8 +1709,8 @@
22.40 int group_size = ptpcoll_module->full_narray_tree_size;
22.41 int completed = 0;
22.42 int virtual_root;
22.43 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
22.44 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
22.45 + netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
22.46 + netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
22.47
22.48 PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
22.49 const_args->index_of_this_type_in_collective + 1,
22.50 @@ -2032,7 +2032,7 @@
22.51 int matched = true;
22.52 int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
22.53 int relative_group_index = 0;
22.54 - mca_common_netpatterns_tree_node_t *narray_node = NULL;
22.55 + netpatterns_tree_node_t *narray_node = NULL;
22.56
22.57 PTPCOLL_VERBOSE(3, ("Bcast, Narray tree Progress"));
22.58
22.59 @@ -2119,7 +2119,7 @@
22.60 int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
22.61 int group_root_index;
22.62 int relative_group_index = 0;
22.63 - mca_common_netpatterns_tree_node_t *narray_node = NULL;
22.64 + netpatterns_tree_node_t *narray_node = NULL;
22.65
22.66 PTPCOLL_VERBOSE(3, ("Bcast, Narray tree"));
22.67
23.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h Tue Feb 19 22:36:41 2013 +0000
23.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h Tue Feb 19 22:50:56 2013 +0000
23.3 @@ -756,9 +756,9 @@
23.4 ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
23.5 ompi_request_t **requests =
23.6 ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
23.7 - mca_common_netpatterns_narray_knomial_tree_node_t *narray_node =
23.8 + netpatterns_narray_knomial_tree_node_t *narray_node =
23.9 &ptpcoll_module->narray_knomial_node[relative_group_index];
23.10 - mca_common_netpatterns_k_exchange_node_t *k_node =
23.11 + netpatterns_k_exchange_node_t *k_node =
23.12 &narray_node->k_node;
23.13 mca_bcol_ptpcoll_component_t *cm =
23.14 &mca_bcol_ptpcoll_component;
24.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c Tue Feb 19 22:36:41 2013 +0000
24.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c Tue Feb 19 22:50:56 2013 +0000
24.3 @@ -265,7 +265,7 @@
24.4 {
24.5 mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super;
24.6 int rc = 0;
24.7 - rc = mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
24.8 + rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
24.9 p2p_module->super.sbgp_partner_module->group_size,
24.10 p2p_module->super.sbgp_partner_module->my_index,
24.11 mca_bcol_ptpcoll_component.k_nomial_radix,
24.12 @@ -315,7 +315,7 @@
24.13
24.14 ptpcoll_module->narray_knomial_node = calloc(
24.15 ptpcoll_module->full_narray_tree_size,
24.16 - sizeof(mca_common_netpatterns_narray_knomial_tree_node_t));
24.17 + sizeof(netpatterns_narray_knomial_tree_node_t));
24.18 if(NULL == ptpcoll_module->narray_knomial_node) {
24.19 goto Error;
24.20 }
24.21 @@ -346,7 +346,7 @@
24.22 }
24.23 /* Setting node info */
24.24 for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
24.25 - rc = mca_common_netpatterns_setup_narray_knomial_tree(
24.26 + rc = netpatterns_setup_narray_knomial_tree(
24.27 cm->narray_knomial_radix,
24.28 i,
24.29 ptpcoll_module->full_narray_tree_size,
24.30 @@ -382,13 +382,13 @@
24.31 mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
24.32
24.33 ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size,
24.34 - sizeof(mca_common_netpatterns_tree_node_t));
24.35 + sizeof(netpatterns_tree_node_t));
24.36 if(NULL == ptpcoll_module->narray_node ) {
24.37 goto Error;
24.38 }
24.39
24.40 for(i = 0; i < ptpcoll_module->group_size; i++) {
24.41 - rc = mca_common_netpatterns_setup_narray_tree(
24.42 + rc = netpatterns_setup_narray_tree(
24.43 cm->narray_radix,
24.44 i,
24.45 ptpcoll_module->group_size,
24.46 @@ -510,7 +510,7 @@
24.47 static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
24.48 {
24.49 int rc = OMPI_SUCCESS;
24.50 - rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
24.51 + rc = netpatterns_setup_recursive_knomial_tree_node(
24.52 ptpcoll_module->group_size,
24.53 ptpcoll_module->super.sbgp_partner_module->my_index,
24.54 mca_bcol_ptpcoll_component.k_nomial_radix,
25.1 --- a/ompi/mca/coll/ml/Makefile.am Tue Feb 19 22:36:41 2013 +0000
25.2 +++ b/ompi/mca/coll/ml/Makefile.am Tue Feb 19 22:50:56 2013 +0000
25.3 @@ -68,18 +68,9 @@
25.4 mcacomponent_LTLIBRARIES = $(component_install)
25.5 mca_coll_ml_la_SOURCES = $(sources)
25.6 mca_coll_ml_la_LDFLAGS = -module -avoid-version
25.7 -mca_coll_ml_la_LIBADD = \
25.8 - $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la \
25.9 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
25.10 +mca_coll_ml_la_LIBADD =
25.11
25.12
25.13 noinst_LTLIBRARIES = $(component_noinst)
25.14 libmca_coll_ml_la_SOURCES =$(sources)
25.15 libmca_coll_ml_la_LDFLAGS = -module -avoid-version
25.16 -
25.17 -$(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la: foo.c
25.18 - cd $(top_ompi_builddir)/ompi/mca/common/commpatterns && $(MAKE)
25.19 -$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la: foo.c
25.20 - cd $(top_ompi_builddir)/ompi/mca/common/netpatterns && $(MAKE)
25.21 -
25.22 -foo.c:
26.1 --- a/ompi/mca/coll/ml/coll_ml_component.c Tue Feb 19 22:36:41 2013 +0000
26.2 +++ b/ompi/mca/coll/ml/coll_ml_component.c Tue Feb 19 22:50:56 2013 +0000
26.3 @@ -36,7 +36,7 @@
26.4 #include "coll_ml.h"
26.5 #include "coll_ml_inlines.h"
26.6
26.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
26.8 +#include "ompi/patterns/net/netpatterns.h"
26.9 #include "coll_ml_mca.h"
26.10 #include "coll_ml_custom_utils.h"
26.11
26.12 @@ -460,7 +460,7 @@
26.13 OBJ_CONSTRUCT(&(cs->sequential_collectives_mutex), opal_mutex_t);
26.14 OBJ_CONSTRUCT(&(cs->sequential_collectives), opal_list_t);
26.15
26.16 - rc = ompi_common_netpatterns_init();
26.17 + rc = netpatterns_init();
26.18 if (OMPI_SUCCESS != rc) {
26.19 return rc;
26.20 }
27.1 --- a/ompi/mca/coll/ml/coll_ml_custom_utils.c Tue Feb 19 22:36:41 2013 +0000
27.2 +++ b/ompi/mca/coll/ml/coll_ml_custom_utils.c Tue Feb 19 22:50:56 2013 +0000
27.3 @@ -32,7 +32,7 @@
27.4 #include "ompi/mca/coll/base/base.h"
27.5 #include "ompi/mca/coll/ml/coll_ml.h"
27.6 #include "ompi/mca/coll/ml/coll_ml_inlines.h"
27.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
27.8 +#include "ompi/patterns/comm/coll_ops.h"
27.9
27.10 #include "ompi/datatype/ompi_datatype.h"
27.11 #include "ompi/communicator/communicator.h"
28.1 --- a/ompi/mca/coll/ml/coll_ml_mca.c Tue Feb 19 22:36:41 2013 +0000
28.2 +++ b/ompi/mca/coll/ml/coll_ml_mca.c Tue Feb 19 22:50:56 2013 +0000
28.3 @@ -25,7 +25,7 @@
28.4 #include "coll_ml_inlines.h"
28.5 #include "coll_ml_mca.h"
28.6 #include "coll_ml_lmngr.h"
28.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
28.8 +#include "ompi/patterns/net/netpatterns.h"
28.9 #include "opal/mca/installdirs/installdirs.h"
28.10
28.11 /*
29.1 --- a/ompi/mca/coll/ml/coll_ml_module.c Tue Feb 19 22:36:41 2013 +0000
29.2 +++ b/ompi/mca/coll/ml/coll_ml_module.c Tue Feb 19 22:50:56 2013 +0000
29.3 @@ -30,7 +30,7 @@
29.4 #include "ompi/mca/sbgp/base/base.h"
29.5 #include "ompi/mca/bcol/base/base.h"
29.6 #include "ompi/mca/sbgp/sbgp.h"
29.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
29.8 +#include "ompi/patterns/comm/coll_ops.h"
29.9 #include "ompi/mca/coll/ml/coll_ml.h"
29.10
29.11 #include "orte/mca/rml/rml.h"
30.1 --- a/ompi/mca/common/commpatterns/Makefile.am Tue Feb 19 22:36:41 2013 +0000
30.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
30.3 @@ -1,104 +0,0 @@
30.4 -#
30.5 -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
30.6 -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
30.7 -# $COPYRIGHT$
30.8 -#
30.9 -# Additional copyrights may follow
30.10 -#
30.11 -# $HEADER$
30.12 -#
30.13 -
30.14 -# A word of explanation...
30.15 -#
30.16 -# This library is linked against various MCA components because all
30.17 -# shared-memory based components (e.g., mpool, ptl, etc.) need to
30.18 -# share some common code and data. There's two cases:
30.19 -#
30.20 -# 1. libmca_common_commpatterns.la is a shared library. By linking that shared
30.21 -# library to all components that need it, the OS linker will
30.22 -# automatically load it into the process as necessary, and there will
30.23 -# only be one copy (i.e., all the components will share *one* copy of
30.24 -# the code and data).
30.25 -#
30.26 -# 2. libmca_common_commpatterns.la is a static library. In this case, it will
30.27 -# be rolled up into the top-level libmpi.la. It will also be rolled
30.28 -# into each component, but then the component will also be rolled up
30.29 -# into the upper-level libmpi.la. Linkers universally know how to
30.30 -# "figure this out" so that we end up with only one copy of the code
30.31 -# and data.
30.32 -#
30.33 -# Note that building this common component statically and linking
30.34 -# against other dynamic components is *not* supported!
30.35 -
30.36 -EXTRA_DIST = .windows
30.37 -
30.38 -# Header files
30.39 -
30.40 -headers = \
30.41 - common_coll_ops.h \
30.42 - common_netpatterns.h \
30.43 - ompi_common_netpatterns_macros.h
30.44 -
30.45 -# Source files
30.46 -
30.47 -sources = \
30.48 - common_allreduce.c \
30.49 - common_allgather.c \
30.50 - common_bcast.c
30.51 -
30.52 -# As per above, we'll either have an installable or noinst result.
30.53 -# The installable one should follow the same MCA prefix naming rules
30.54 -# (i.e., libmca_<type>_<name>.la). The noinst one can be named
30.55 -# whatever it wants, although libmca_<type>_<name>_noinst.la is
30.56 -# recommended.
30.57 -
30.58 -# To simplify components that link to this library, we will *always*
30.59 -# have an output libtool library named libmca_<type>_<name>.la -- even
30.60 -# for case 2) described above (i.e., so there's no conditional logic
30.61 -# necessary in component Makefile.am's that link to this library).
30.62 -# Hence, if we're creating a noinst version of this library (i.e.,
30.63 -# case 2), we sym link it to the libmca_<type>_<name>.la name
30.64 -# (libtool will do the Right Things under the covers). See the
30.65 -# all-local and clean-local rules, below, for how this is effected.
30.66 -
30.67 -lib_LTLIBRARIES =
30.68 -noinst_LTLIBRARIES =
30.69 -comp_inst = libmca_common_commpatterns.la
30.70 -comp_noinst = libmca_common_commpatterns_noinst.la
30.71 -
30.72 -if MCA_BUILD_ompi_common_commpatterns_DSO
30.73 -lib_LTLIBRARIES += $(comp_inst)
30.74 -else
30.75 -noinst_LTLIBRARIES += $(comp_noinst)
30.76 -endif
30.77 -
30.78 -libmca_common_commpatterns_la_SOURCES = $(headers) $(sources)
30.79 -libmca_common_commpatterns_noinst_la_SOURCES = $(libmca_common_commpatterns_la_SOURCES)
30.80 -libmca_common_commpatterns_la_LIBADD = \
30.81 - $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
30.82 -
30.83 -# These two rules will sym link the "noinst" libtool library filename
30.84 -# to the installable libtool library filename in the case where we are
30.85 -# compiling this component statically (case 2), described above).
30.86 -
30.87 -all-local:
30.88 - if test -z "$(lib_LTLIBRARIES)"; then \
30.89 - rm -f "$(comp_inst)"; \
30.90 - $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
30.91 - fi
30.92 -
30.93 -clean-local:
30.94 - if test -z "$(lib_LTLIBRARIES)"; then \
30.95 - rm -f "$(comp_inst)"; \
30.96 - fi
30.97 -# The code below guaranty that the netpatterns will be build before commpatterns
30.98 -FORCE:
30.99 -
30.100 -$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la: FORCE
30.101 - (cd $(top_ompi_builddir)/ompi/mca/common/netpatterns/ && $(MAKE) $(AM_MAKEFLAGS) libmca_common_netpatterns.la)
30.102 -
30.103 -install-libmca_common_netpatterns: FORCE
30.104 - (cd $(top_ompi_builddir)/ompi/mca/common/netpatterns/ && $(MAKE) $(AM_MAKEFLAGS) install)
30.105 -
30.106 -install: install-libmca_common_netpatterns install-am
30.107 -
31.1 --- a/ompi/mca/common/commpatterns/common_allgather.c Tue Feb 19 22:36:41 2013 +0000
31.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
31.3 @@ -1,289 +0,0 @@
31.4 -/*
31.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
31.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
31.7 - * $COPYRIGHT$
31.8 - *
31.9 - * Additional copyrights may follow
31.10 - *
31.11 - * $HEADER$
31.12 - */
31.13 -/** @file */
31.14 -
31.15 -#include "ompi_config.h"
31.16 -
31.17 -#include "ompi/constants.h"
31.18 -#include "ompi/op/op.h"
31.19 -#include "ompi/datatype/ompi_datatype.h"
31.20 -#include "ompi/communicator/communicator.h"
31.21 -#include "orte/mca/rml/rml.h"
31.22 -#include "opal/include/opal/sys/atomic.h"
31.23 -#include "common_coll_ops.h"
31.24 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
31.25 -#include "ompi/mca/dpm/dpm.h"
31.26 -#include "orte/util/proc_info.h"
31.27 -#include "ompi/mca/pml/pml.h"
31.28 -
31.29 -/**
31.30 - * All-reduce - subgroup in communicator
31.31 - */
31.32 -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
31.33 - ompi_datatype_t *dtype, int my_rank_in_group,
31.34 - int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
31.35 -{
31.36 - /* local variables */
31.37 - int rc=OMPI_SUCCESS,msg_cnt;
31.38 - int pair_rank,exchange,extra_rank, n_extra_nodes,n_extra;
31.39 - int proc_block,extra_start,extra_end,iovec_len;
31.40 - int remote_data_start_rank,remote_data_end_rank;
31.41 - int local_data_start_rank;
31.42 - mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
31.43 - size_t message_extent,current_data_extent,current_data_count;
31.44 - size_t dt_size;
31.45 - OPAL_PTRDIFF_TYPE dt_extent;
31.46 - char *src_buf_current;
31.47 - char *dest_buf_current;
31.48 - struct iovec send_iov[2] = {{0,0},{0,0}},
31.49 - recv_iov[2] = {{0,0},{0,0}};
31.50 - ompi_request_t *requests[4];
31.51 -
31.52 - /* get size of data needed - same layout as user data, so that
31.53 - * we can apply the reudction routines directly on these buffers
31.54 - */
31.55 - rc = ompi_datatype_type_size(dtype, &dt_size);
31.56 - if( OMPI_SUCCESS != rc ) {
31.57 - goto Error;
31.58 - }
31.59 -
31.60 - rc = ompi_datatype_type_extent(dtype, &dt_extent);
31.61 - if( OMPI_SUCCESS != rc ) {
31.62 - goto Error;
31.63 - }
31.64 - message_extent = dt_extent*count;
31.65 -
31.66 - /* place my data in the correct destination buffer */
31.67 - rc=ompi_datatype_copy_content_same_ddt(dtype,count,
31.68 - (char *)dest_buf+my_rank_in_group*message_extent,
31.69 - (char *)src_buf);
31.70 - if( OMPI_SUCCESS != rc ) {
31.71 - goto Error;
31.72 - }
31.73 -
31.74 - /* 1 process special case */
31.75 - if(1 == n_peers) {
31.76 - return OMPI_SUCCESS;
31.77 - }
31.78 -
31.79 - /* get my reduction communication pattern */
31.80 - rc = mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,
31.81 - my_rank_in_group, &my_exchange_node);
31.82 - if(OMPI_SUCCESS != rc){
31.83 - return rc;
31.84 - }
31.85 -
31.86 - n_extra_nodes=n_peers-my_exchange_node.n_largest_pow_2;
31.87 -
31.88 - /* get the data from the extra sources */
31.89 - if(0 < my_exchange_node.n_extra_sources) {
31.90 -
31.91 - if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
31.92 -
31.93 - /*
31.94 - ** Receive data from extra node
31.95 - */
31.96 -
31.97 - extra_rank=my_exchange_node.rank_extra_source;
31.98 - /* receive the data into the correct location - will use 2
31.99 - * messages in the recursive doubling phase */
31.100 - dest_buf_current=(char *)dest_buf+message_extent*extra_rank;
31.101 - rc=MCA_PML_CALL(recv(dest_buf_current,
31.102 - count,dtype,ranks_in_comm[extra_rank],
31.103 - -OMPI_COMMON_TAG_ALLREDUCE,
31.104 - comm, MPI_STATUSES_IGNORE));
31.105 - if( 0 > rc ) {
31.106 - goto Error;
31.107 - }
31.108 -
31.109 - } else {
31.110 -
31.111 - /*
31.112 - ** Send data to "partner" node
31.113 - */
31.114 - extra_rank=my_exchange_node.rank_extra_source;
31.115 - src_buf_current=(char *)src_buf;
31.116 - rc=MCA_PML_CALL(send(src_buf_current,
31.117 - count,dtype,ranks_in_comm[extra_rank],
31.118 - -OMPI_COMMON_TAG_ALLREDUCE,
31.119 - MCA_PML_BASE_SEND_STANDARD,
31.120 - comm));
31.121 - if( 0 > rc ) {
31.122 - goto Error;
31.123 - }
31.124 - }
31.125 - }
31.126 -
31.127 - current_data_extent=message_extent;
31.128 - current_data_count=count;
31.129 - src_buf_current=(char *)dest_buf+my_rank_in_group*message_extent;
31.130 - proc_block=1;
31.131 - local_data_start_rank=my_rank_in_group;
31.132 - /* loop over data exchanges */
31.133 - for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
31.134 -
31.135 - /* is the remote data read */
31.136 - pair_rank=my_exchange_node.rank_exchanges[exchange];
31.137 - msg_cnt=0;
31.138 -
31.139 - /*
31.140 - * Power of 2 data segment
31.141 - */
31.142 - /* post non-blocking receive */
31.143 - if(pair_rank > my_rank_in_group ){
31.144 - recv_iov[0].iov_base=src_buf_current+current_data_extent;
31.145 - recv_iov[0].iov_len=current_data_extent;
31.146 - iovec_len=1;
31.147 - remote_data_start_rank=local_data_start_rank+proc_block;
31.148 - remote_data_end_rank=remote_data_start_rank+proc_block-1;
31.149 - } else {
31.150 - recv_iov[0].iov_base=src_buf_current-current_data_extent;
31.151 - recv_iov[0].iov_len=current_data_extent;
31.152 - iovec_len=1;
31.153 - remote_data_start_rank=local_data_start_rank-proc_block;
31.154 - remote_data_end_rank=remote_data_start_rank+proc_block-1;
31.155 - }
31.156 - /* the data from the non power of 2 ranks */
31.157 - if(remote_data_start_rank<n_extra_nodes) {
31.158 - /* figure out how much data is at the remote rank */
31.159 - /* last rank with data */
31.160 - extra_start=remote_data_start_rank;
31.161 - extra_end=remote_data_end_rank;
31.162 - if(extra_end >= n_extra_nodes ) {
31.163 - /* if last rank exceeds the ranks with extra data,
31.164 - * adjust this.
31.165 - */
31.166 - extra_end=n_extra_nodes-1;
31.167 - }
31.168 - /* get the number of ranks whos data is to be grabbed */
31.169 - n_extra=extra_end-extra_start+1;
31.170 -
31.171 - recv_iov[1].iov_base=(char *)dest_buf+
31.172 - (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
31.173 - recv_iov[1].iov_len=n_extra*count;
31.174 - iovec_len=2;
31.175 - }
31.176 -
31.177 - rc=MCA_PML_CALL(irecv(recv_iov[0].iov_base,
31.178 - current_data_count,dtype,ranks_in_comm[pair_rank],
31.179 - -OMPI_COMMON_TAG_ALLREDUCE,
31.180 - comm,&(requests[msg_cnt])));
31.181 - if( 0 > rc ) {
31.182 - goto Error;
31.183 - }
31.184 - msg_cnt++;
31.185 -
31.186 - if(iovec_len > 1 ) {
31.187 - rc=MCA_PML_CALL(irecv(recv_iov[1].iov_base,
31.188 - recv_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
31.189 - -OMPI_COMMON_TAG_ALLREDUCE,
31.190 - comm,&(requests[msg_cnt])));
31.191 - if( 0 > rc ) {
31.192 - goto Error;
31.193 - }
31.194 - msg_cnt++;
31.195 - }
31.196 -
31.197 - /* post non-blocking send */
31.198 - send_iov[0].iov_base=src_buf_current;
31.199 - send_iov[0].iov_len=current_data_extent;
31.200 - iovec_len=1;
31.201 - /* the data from the non power of 2 ranks */
31.202 - if(local_data_start_rank<n_extra_nodes) {
31.203 - /* figure out how much data is at the remote rank */
31.204 - /* last rank with data */
31.205 - extra_start=local_data_start_rank;
31.206 - extra_end=extra_start+proc_block-1;
31.207 - if(extra_end >= n_extra_nodes ) {
31.208 - /* if last rank exceeds the ranks with extra data,
31.209 - * adjust this.
31.210 - */
31.211 - extra_end=n_extra_nodes-1;
31.212 - }
31.213 - /* get the number of ranks whos data is to be grabbed */
31.214 - n_extra=extra_end-extra_start+1;
31.215 -
31.216 - send_iov[1].iov_base=(char *)dest_buf+
31.217 - (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
31.218 - send_iov[1].iov_len=n_extra*count;
31.219 - iovec_len=2;
31.220 - }
31.221 -
31.222 - rc=MCA_PML_CALL(isend(send_iov[0].iov_base,
31.223 - current_data_count,dtype,ranks_in_comm[pair_rank],
31.224 - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
31.225 - comm,&(requests[msg_cnt])));
31.226 - if( 0 > rc ) {
31.227 - goto Error;
31.228 - }
31.229 - msg_cnt++;
31.230 - if( iovec_len > 1 ) {
31.231 - rc=MCA_PML_CALL(isend(send_iov[1].iov_base,
31.232 - send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
31.233 - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
31.234 - comm,&(requests[msg_cnt])));
31.235 - if( 0 > rc ) {
31.236 - goto Error;
31.237 - }
31.238 - msg_cnt++;
31.239 - }
31.240 -
31.241 - /* prepare the source buffer for the next iteration */
31.242 - if(pair_rank < my_rank_in_group ){
31.243 - src_buf_current-=current_data_extent;
31.244 - local_data_start_rank-=proc_block;
31.245 - }
31.246 - proc_block*=2;
31.247 - current_data_extent*=2;
31.248 - current_data_count*=2;
31.249 -
31.250 - /* wait on send and receive completion */
31.251 - ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
31.252 - }
31.253 -
31.254 - /* copy data in from the "extra" source, if need be */
31.255 - if(0 < my_exchange_node.n_extra_sources) {
31.256 -
31.257 - if ( EXTRA_NODE == my_exchange_node.node_type ) {
31.258 - /*
31.259 - ** receive the data
31.260 - ** */
31.261 - extra_rank=my_exchange_node.rank_extra_source;
31.262 -
31.263 - rc=MCA_PML_CALL(recv(dest_buf,
31.264 - count*n_peers,dtype,ranks_in_comm[extra_rank],
31.265 - -OMPI_COMMON_TAG_ALLREDUCE,
31.266 - comm,MPI_STATUSES_IGNORE));
31.267 - if(0 > rc ) {
31.268 - goto Error;
31.269 - }
31.270 - } else {
31.271 - /* send the data to the pair-rank outside of the power of 2 set
31.272 - ** of ranks
31.273 - */
31.274 -
31.275 - extra_rank=my_exchange_node.rank_extra_source;
31.276 - rc=MCA_PML_CALL(send(dest_buf,
31.277 - count*n_peers,dtype,ranks_in_comm[extra_rank],
31.278 - -OMPI_COMMON_TAG_ALLREDUCE,
31.279 - MCA_PML_BASE_SEND_STANDARD,
31.280 - comm));
31.281 - if( 0 > rc ) {
31.282 - goto Error;
31.283 - }
31.284 - }
31.285 - }
31.286 -
31.287 - /* return */
31.288 - return OMPI_SUCCESS;
31.289 -
31.290 -Error:
31.291 - return rc;
31.292 -}
32.1 --- a/ompi/mca/common/commpatterns/common_allreduce.c Tue Feb 19 22:36:41 2013 +0000
32.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
32.3 @@ -1,256 +0,0 @@
32.4 -/*
32.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
32.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
32.7 - * $COPYRIGHT$
32.8 - *
32.9 - * Additional copyrights may follow
32.10 - *
32.11 - * $HEADER$
32.12 - */
32.13 -/** @file */
32.14 -
32.15 -#include "ompi_config.h"
32.16 -
32.17 -#include "ompi/constants.h"
32.18 -#include "ompi/op/op.h"
32.19 -#include "ompi/datatype/ompi_datatype.h"
32.20 -#include "ompi/communicator/communicator.h"
32.21 -#include "orte/mca/rml/rml.h"
32.22 -#include "opal/include/opal/sys/atomic.h"
32.23 -#include "ompi/mca/common/commpatterns/common_netpatterns.h"
32.24 -#include "common_coll_ops.h"
32.25 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
32.26 -#include "ompi/mca/dpm/dpm.h"
32.27 -#include "orte/util/proc_info.h"
32.28 -#include "ompi/mca/pml/pml.h"
32.29 -
32.30 -/**
32.31 - * All-reduce for contigous primitive types
32.32 - */
32.33 -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
32.34 - ompi_datatype_t *dtype, int my_rank_in_group,
32.35 - struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
32.36 - ompi_communicator_t *comm)
32.37 -{
32.38 - /* local variables */
32.39 - int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
32.40 - int pair_rank,exchange,extra_rank;
32.41 - mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
32.42 - int count_processed,count_this_stripe;
32.43 - size_t dt_size,dt_extent;
32.44 - char scratch_bufers[2][MAX_TMP_BUFFER];
32.45 - int send_buffer=0,recv_buffer=1;
32.46 - char *sbuf_current, *rbuf_current;
32.47 - ompi_request_t *requests[2];
32.48 -
32.49 - /* get size of data needed - same layout as user data, so that
32.50 - * we can apply the reudction routines directly on these buffers
32.51 - */
32.52 - rc = opal_datatype_type_size((opal_datatype_t *)dtype, &dt_size);
32.53 - if( OMPI_SUCCESS != rc ) {
32.54 - goto Error;
32.55 - }
32.56 - rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent);
32.57 - if( OMPI_SUCCESS != rc ) {
32.58 - goto Error;
32.59 - }
32.60 -
32.61 - /* 1 process special case */
32.62 - if(1 == n_peers) {
32.63 - /* place my data in the correct destination buffer */
32.64 - rc=ompi_datatype_copy_content_same_ddt(dtype,count,
32.65 - (char *)rbuf, (char *)sbuf);
32.66 - if( OMPI_SUCCESS != rc ) {
32.67 - goto Error;
32.68 - }
32.69 - return OMPI_SUCCESS;
32.70 - }
32.71 -
32.72 - /* number of data types copies that the scratch buffer can hold */
32.73 - n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_extent;
32.74 - if ( 0 == n_dts_per_buffer ) {
32.75 - rc=OMPI_ERROR;
32.76 - goto Error;
32.77 - }
32.78 -
32.79 - /* compute number of stripes needed to process this collective */
32.80 - n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
32.81 -
32.82 - /* get my reduction communication pattern */
32.83 - rc = mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,
32.84 - my_rank_in_group, &my_exchange_node);
32.85 - if(OMPI_SUCCESS != rc){
32.86 - return rc;
32.87 - }
32.88 -
32.89 - count_processed=0;
32.90 -
32.91 - /* get a pointer to the shared-memory working buffer */
32.92 - /* NOTE: starting with a rather synchronous approach */
32.93 - for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
32.94 -
32.95 - /* get number of elements to process in this stripe */
32.96 - count_this_stripe=n_dts_per_buffer;
32.97 - if( count_processed + count_this_stripe > count )
32.98 - count_this_stripe=count-count_processed;
32.99 -
32.100 - /* copy data from the input buffer into the temp buffer */
32.101 - sbuf_current=(char *)sbuf+count_processed*dt_extent;
32.102 - rc=ompi_datatype_copy_content_same_ddt(dtype,count_this_stripe,
32.103 - scratch_bufers[send_buffer], sbuf_current);
32.104 - if( OMPI_SUCCESS != rc ) {
32.105 - goto Error;
32.106 - }
32.107 -
32.108 - /* copy data in from the "extra" source, if need be */
32.109 - if(0 < my_exchange_node.n_extra_sources) {
32.110 -
32.111 - if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
32.112 -
32.113 - /*
32.114 - ** Receive data from extra node
32.115 - */
32.116 - extra_rank=my_exchange_node.rank_extra_source;
32.117 - rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
32.118 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.119 - -OMPI_COMMON_TAG_ALLREDUCE, comm,
32.120 - MPI_STATUSES_IGNORE));
32.121 - if( 0 > rc ) {
32.122 - fprintf(stderr," first recv failed in comm_allreduce_pml \n");
32.123 - fflush(stderr);
32.124 - goto Error;
32.125 - }
32.126 -
32.127 -
32.128 - /* apply collective operation to first half of the data */
32.129 - if( 0 < count_this_stripe ) {
32.130 - ompi_op_reduce(op,
32.131 - (void *)scratch_bufers[send_buffer],
32.132 - (void *)scratch_bufers[recv_buffer],
32.133 - count_this_stripe,dtype);
32.134 - }
32.135 -
32.136 -
32.137 - } else {
32.138 -
32.139 - /*
32.140 - ** Send data to "partner" node
32.141 - */
32.142 - extra_rank=my_exchange_node.rank_extra_source;
32.143 - rc=MCA_PML_CALL(send(scratch_bufers[send_buffer],
32.144 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.145 - -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
32.146 - comm));
32.147 - if( 0 > rc ) {
32.148 - fprintf(stderr," first send failed in comm_allreduce_pml \n");
32.149 - fflush(stderr);
32.150 - goto Error;
32.151 - }
32.152 - }
32.153 -
32.154 - /* change pointer to scratch buffer - this was we can send data
32.155 - ** that we have summed w/o a memory copy, and receive data into the
32.156 - ** other buffer, w/o fear of over writting data that has not yet
32.157 - ** completed being send
32.158 - */
32.159 - recv_buffer^=1;
32.160 - send_buffer^=1;
32.161 - }
32.162 -
32.163 - /* loop over data exchanges */
32.164 - for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
32.165 -
32.166 - /* is the remote data read */
32.167 - pair_rank=my_exchange_node.rank_exchanges[exchange];
32.168 -
32.169 - /* post non-blocking receive */
32.170 - rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer],
32.171 - count_this_stripe,dtype,ranks_in_comm[pair_rank],
32.172 - -OMPI_COMMON_TAG_ALLREDUCE,
32.173 - comm,&(requests[0])));
32.174 - if( 0 > rc ) {
32.175 - fprintf(stderr," irecv failed in comm_allreduce_pml at iterations %d \n",
32.176 - exchange);
32.177 - fflush(stderr);
32.178 - goto Error;
32.179 - }
32.180 -
32.181 - /* post non-blocking send */
32.182 - rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer],
32.183 - count_this_stripe,dtype, ranks_in_comm[pair_rank],
32.184 - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
32.185 - comm,&(requests[1])));
32.186 - if( 0 > rc ) {
32.187 - fprintf(stderr," isend failed in comm_allreduce_pml at iterations %d \n",
32.188 - exchange);
32.189 - fflush(stderr);
32.190 - goto Error;
32.191 - }
32.192 - /* wait on send and receive completion */
32.193 - ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE);
32.194 -
32.195 - /* reduce the data */
32.196 - if( 0 < count_this_stripe ) {
32.197 - ompi_op_reduce(op,
32.198 - (void *)scratch_bufers[send_buffer],
32.199 - (void *)scratch_bufers[recv_buffer],
32.200 - count_this_stripe,dtype);
32.201 - }
32.202 - /* get ready for next step */
32.203 - recv_buffer^=1;
32.204 - send_buffer^=1;
32.205 -
32.206 - }
32.207 -
32.208 - /* copy data in from the "extra" source, if need be */
32.209 - if(0 < my_exchange_node.n_extra_sources) {
32.210 -
32.211 - if ( EXTRA_NODE == my_exchange_node.node_type ) {
32.212 - /*
32.213 - ** receive the data
32.214 - ** */
32.215 - extra_rank=my_exchange_node.rank_extra_source;
32.216 - rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
32.217 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.218 - -OMPI_COMMON_TAG_ALLREDUCE, comm,
32.219 - MPI_STATUSES_IGNORE));
32.220 - if( 0 > rc ) {
32.221 - fprintf(stderr," last recv failed in comm_allreduce_pml \n");
32.222 - fflush(stderr);
32.223 - goto Error;
32.224 - }
32.225 -
32.226 - recv_buffer^=1;
32.227 - send_buffer^=1;
32.228 - } else {
32.229 - /* send the data to the pair-rank outside of the power of 2 set
32.230 - ** of ranks
32.231 - */
32.232 -
32.233 - extra_rank=my_exchange_node.rank_extra_source;
32.234 - rc=MCA_PML_CALL(send((char *)scratch_bufers[send_buffer],
32.235 - count_this_stripe,dtype,ranks_in_comm[extra_rank],
32.236 - -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
32.237 - comm));
32.238 - if( 0 > rc ) {
32.239 - fprintf(stderr," last send failed in comm_allreduce_pml \n");
32.240 - fflush(stderr);
32.241 - goto Error;
32.242 - }
32.243 - }
32.244 - }
32.245 -
32.246 - /* copy data from the temp buffer into the output buffer */
32.247 - rbuf_current = (char *) rbuf + count_processed * dt_size;
32.248 - memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size);
32.249 -
32.250 - /* update the count of elements processed */
32.251 - count_processed += count_this_stripe;
32.252 - }
32.253 -
32.254 - /* return */
32.255 - return OMPI_SUCCESS;
32.256 -
32.257 -Error:
32.258 - return rc;
32.259 -}
33.1 --- a/ompi/mca/common/commpatterns/common_bcast.c Tue Feb 19 22:36:41 2013 +0000
33.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
33.3 @@ -1,98 +0,0 @@
33.4 -/*
33.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
33.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
33.7 - * $COPYRIGHT$
33.8 - *
33.9 - * Additional copyrights may follow
33.10 - *
33.11 - * $HEADER$
33.12 - */
33.13 -/** @file */
33.14 -
33.15 -#include "ompi_config.h"
33.16 -
33.17 -#include "ompi/constants.h"
33.18 -#include "ompi/op/op.h"
33.19 -#include "ompi/datatype/ompi_datatype.h"
33.20 -#include "ompi/communicator/communicator.h"
33.21 -#include "orte/mca/rml/rml.h"
33.22 -#include "opal/include/opal/sys/atomic.h"
33.23 -#include "common_coll_ops.h"
33.24 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
33.25 -#include "ompi/mca/dpm/dpm.h"
33.26 -#include "orte/util/proc_info.h"
33.27 -#include "ompi/mca/pml/pml.h"
33.28 -
33.29 -/**
33.30 - * Bcast - subgroup in communicator
33.31 - * This is a very simple algorithm - binary tree, transmitting the full
33.32 - * message at each step.
33.33 - */
33.34 -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
33.35 - ompi_datatype_t *dtype, int my_rank_in_group,
33.36 - int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
33.37 -{
33.38 - /* local variables */
33.39 - int rc=OMPI_SUCCESS,msg_cnt,i;
33.40 - ompi_request_t *requests[2];
33.41 - int node_rank, peer_rank;
33.42 - mca_common_netpatterns_tree_node_t node_data;
33.43 -
33.44 - /*
33.45 - * shift rank to root==0 tree
33.46 - */
33.47 - node_rank=(my_rank_in_group-root+n_peers)%n_peers;
33.48 -
33.49 - /*
33.50 - * compute my communication pattern - binary tree
33.51 - */
33.52 - rc=mca_common_netpatterns_setup_narray_tree(2, node_rank, n_peers,
33.53 - &node_data);
33.54 - if( OMPI_SUCCESS != rc ) {
33.55 - goto Error;
33.56 - }
33.57 -
33.58 - /* 1 process special case */
33.59 - if(1 == n_peers) {
33.60 - return OMPI_SUCCESS;
33.61 - }
33.62 -
33.63 - /* if I have parents - wait on the data to arrive */
33.64 - if(node_data.n_parents) {
33.65 - /* I will have only 1 parent */
33.66 - peer_rank=node_data.parent_rank;
33.67 - peer_rank=(peer_rank+root)%n_peers;
33.68 - /* translate back to actual rank */
33.69 - rc=MCA_PML_CALL(recv(buffer, count,dtype,peer_rank,
33.70 - -OMPI_COMMON_TAG_BCAST, comm, MPI_STATUSES_IGNORE));
33.71 - if( 0 > rc ) {
33.72 - goto Error;
33.73 - }
33.74 - }
33.75 -
33.76 - /* send the data to my children */
33.77 - msg_cnt=0;
33.78 - for(i=0 ; i < node_data.n_children ; i++ ) {
33.79 - peer_rank=node_data.children_ranks[i];
33.80 - peer_rank=(peer_rank+root)%n_peers;
33.81 - rc=MCA_PML_CALL(isend(buffer,
33.82 - count,dtype,peer_rank,
33.83 - -OMPI_COMMON_TAG_BCAST,MCA_PML_BASE_SEND_STANDARD,
33.84 - comm,&(requests[msg_cnt])));
33.85 - if( 0 > rc ) {
33.86 - goto Error;
33.87 - }
33.88 - msg_cnt++;
33.89 - }
33.90 - /* wait for send completion */
33.91 - if(msg_cnt) {
33.92 - /* wait on send and receive completion */
33.93 - ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
33.94 - }
33.95 -
33.96 - /* return */
33.97 - return OMPI_SUCCESS;
33.98 -
33.99 -Error:
33.100 - return rc;
33.101 -}
34.1 --- a/ompi/mca/common/commpatterns/common_coll_ops.h Tue Feb 19 22:36:41 2013 +0000
34.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
34.3 @@ -1,48 +0,0 @@
34.4 -/*
34.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
34.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
34.7 - * $COPYRIGHT$
34.8 - *
34.9 - * Additional copyrights may follow
34.10 - *
34.11 - * $HEADER$
34.12 - */
34.13 -
34.14 -#ifndef COMM_COLL_OP_TYPES_H
34.15 -#define COMM_COLL_OP_TYPES_H
34.16 -
34.17 -#include "ompi_config.h"
34.18 -#include "ompi/datatype/ompi_datatype.h"
34.19 -#include "ompi/proc/proc.h"
34.20 -
34.21 -BEGIN_C_DECLS
34.22 -
34.23 -#define OMPI_COMMON_TAG_ALLREDUCE 99
34.24 -#define OMPI_COMMON_TAG_BCAST 98
34.25 -
34.26 -
34.27 -
34.28 -
34.29 -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
34.30 - ompi_datatype_t *dtype, int my_rank_in_group, int n_peers,
34.31 - int *ranks_in_comm,ompi_communicator_t *comm);
34.32 -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
34.33 - ompi_datatype_t *dtype, int my_rank_in_group,
34.34 - struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
34.35 - ompi_communicator_t *comm);
34.36 -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
34.37 - ompi_datatype_t *dtype, int my_rank_in_group,
34.38 - int n_peers, int *ranks_in_comm,ompi_communicator_t
34.39 - *comm);
34.40 -
34.41 -/* reduction operations supported */
34.42 -#define OP_SUM 1
34.43 -#define OP_MAX 2
34.44 -#define OP_MIN 3
34.45 -
34.46 -#define TYPE_INT4 1
34.47 -
34.48 -
34.49 -END_C_DECLS
34.50 -
34.51 -#endif /* COMM_COLL_OP_TYPES_H */
35.1 --- a/ompi/mca/common/commpatterns/common_netpatterns.h Tue Feb 19 22:36:41 2013 +0000
35.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
35.3 @@ -1,24 +0,0 @@
35.4 -/*
35.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
35.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
35.7 - * $COPYRIGHT$
35.8 - *
35.9 - * Additional copyrights may follow
35.10 - *
35.11 - * $HEADER$
35.12 - */
35.13 -
35.14 -#ifndef COMM_NETPATTERNS_H
35.15 -#define COMM_NETPATTERNS_H
35.16 -
35.17 -#include "ompi_config.h"
35.18 -#include "orte/include/orte/types.h"
35.19 -#include "orte/mca/rml/rml_types.h"
35.20 -
35.21 -BEGIN_C_DECLS
35.22 -
35.23 -#define MAX_TMP_BUFFER 8192
35.24 -
35.25 -END_C_DECLS
35.26 -
35.27 -#endif /* COMM_NETPATTERNS_H */
36.1 --- a/ompi/mca/common/commpatterns/ompi_common_netpatterns_macros.h Tue Feb 19 22:36:41 2013 +0000
36.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
36.3 @@ -1,52 +0,0 @@
36.4 -/*
36.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
36.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
36.7 - * $COPYRIGHT$
36.8 - *
36.9 - * Additional copyrights may follow
36.10 - *
36.11 - * $HEADER$
36.12 - */
36.13 -
36.14 -#ifndef OMPI_COMMON_NETPATTERNS_MACROS_H
36.15 -#define OMPI_COMMON_NETPATTERNS_MACROS_H
36.16 -
36.17 -#include "ompi_config.h"
36.18 -
36.19 -BEGIN_C_DECLS
36.20 -
36.21 -/* function to decompose an interger into it's representation in base K */
36.22 -/*
36.23 - * input_value - value to translate (input)
36.24 - * base - base of representation (input)
36.25 - * highest_power - the highest power that may have a non-zero entry (input)
36.26 - * the assumption is that this will be called in the critical path
36.27 - * to compute communication patterns, so will precompute such values
36.28 - * and pass the in.
36.29 - * base_to_power_i - array of base to ith power (input)
36.30 - * cum_base_to_power_i - array of cummulative base to ith power (input)
36.31 - * base_k_rep - representation in base "base". Space is pre-allocated. (out)
36.32 - */
36.33 -static inline __opal_attribute_always_inline__ void
36.34 -common_netpatterns_obtain_rep_base_k (int input_value, int base,
36.35 - int highest_power, int *base_to_power_i,
36.36 - int *base_k_rep
36.37 - )
36.38 -{
36.39 - /* local variables */
36.40 - int lvl, work_value;
36.41 -
36.42 - /* loop over all possible powers */
36.43 - work_value=input_value;
36.44 - for( lvl=highest_power ; lvl >= 0 ; lvl-- ) {
36.45 - /* still need to compute the actual coefficient */
36.46 - base_k_rep[lvl]=work_value/base_to_power_i[lvl];
36.47 - work_value-=(base_k_rep[lvl]*base_to_power_i[lvl]);
36.48 -
36.49 - }
36.50 -
36.51 -}
36.52 -
36.53 -END_C_DECLS
36.54 -
36.55 -#endif /* OMPI_COMMON_NETPATTERNS_MACROS_H */
37.1 --- a/ompi/mca/common/netpatterns/.windows Tue Feb 19 22:36:41 2013 +0000
37.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
37.3 @@ -1,12 +0,0 @@
37.4 -#
37.5 -# Copyright (c) 2008-2012 High Performance Computing Center Stuttgart,
37.6 -# University of Stuttgart. All rights reserved.
37.7 -# $COPYRIGHT$
37.8 -#
37.9 -# Additional copyrights may follow
37.10 -#
37.11 -# $HEADER$
37.12 -#
37.13 -
37.14 -# Specific to this module
37.15 -exclude_list=common_allreduce.c
37.16 \ No newline at end of file
38.1 --- a/ompi/mca/common/netpatterns/Makefile.am Tue Feb 19 22:36:41 2013 +0000
38.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
38.3 @@ -1,94 +0,0 @@
38.4 -#
38.5 -# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
38.6 -# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
38.7 -# $COPYRIGHT$
38.8 -#
38.9 -# Additional copyrights may follow
38.10 -#
38.11 -# $HEADER$
38.12 -#
38.13 -
38.14 -# A word of explanation...
38.15 -#
38.16 -# This library is linked against various MCA components because all
38.17 -# shared-memory based components (e.g., mpool, ptl, etc.) need to
38.18 -# share some common code and data. There's two cases:
38.19 -#
38.20 -# 1. libmca_common_netpatterns.la is a shared library. By linking that shared
38.21 -# library to all components that need it, the OS linker will
38.22 -# automatically load it into the process as necessary, and there will
38.23 -# only be one copy (i.e., all the components will share *one* copy of
38.24 -# the code and data).
38.25 -#
38.26 -# 2. libmca_common_netpatterns.la is a static library. In this case, it will
38.27 -# be rolled up into the top-level libmpi.la. It will also be rolled
38.28 -# into each component, but then the component will also be rolled up
38.29 -# into the upper-level libmpi.la. Linkers universally know how to
38.30 -# "figure this out" so that we end up with only one copy of the code
38.31 -# and data.
38.32 -#
38.33 -# Note that building this common component statically and linking
38.34 -# against other dynamic components is *not* supported!
38.35 -
38.36 -EXTRA_DIST = .windows
38.37 -
38.38 -# Header files
38.39 -
38.40 -headers = \
38.41 - common_netpatterns.h \
38.42 - common_netpatterns_knomial_tree.h \
38.43 - common_coll_ops.h
38.44 -
38.45 -# Source files
38.46 -
38.47 -sources = \
38.48 - common_netpatterns_base.c \
38.49 - common_netpatterns_multinomial_tree.c \
38.50 - common_netpatterns_nary_tree.c \
38.51 - common_netpatterns_knomial_tree.c
38.52 -
38.53 -# common_allreduce.c # the allredeace is broken
38.54 -
38.55 -# As per above, we'll either have an installable or noinst result.
38.56 -# The installable one should follow the same MCA prefix naming rules
38.57 -# (i.e., libmca_<type>_<name>.la). The noinst one can be named
38.58 -# whatever it wants, although libmca_<type>_<name>_noinst.la is
38.59 -# recommended.
38.60 -
38.61 -# To simplify components that link to this library, we will *always*
38.62 -# have an output libtool library named libmca_<type>_<name>.la -- even
38.63 -# for case 2) described above (i.e., so there's no conditional logic
38.64 -# necessary in component Makefile.am's that link to this library).
38.65 -# Hence, if we're creating a noinst version of this library (i.e.,
38.66 -# case 2), we sym link it to the libmca_<type>_<name>.la name
38.67 -# (libtool will do the Right Things under the covers). See the
38.68 -# all-local and clean-local rules, below, for how this is effected.
38.69 -
38.70 -lib_LTLIBRARIES =
38.71 -noinst_LTLIBRARIES =
38.72 -comp_inst = libmca_common_netpatterns.la
38.73 -comp_noinst = libmca_common_netpatterns_noinst.la
38.74 -
38.75 -if MCA_BUILD_ompi_common_netpatterns_DSO
38.76 -lib_LTLIBRARIES += $(comp_inst)
38.77 -else
38.78 -noinst_LTLIBRARIES += $(comp_noinst)
38.79 -endif
38.80 -
38.81 -libmca_common_netpatterns_la_SOURCES = $(headers) $(sources)
38.82 -libmca_common_netpatterns_noinst_la_SOURCES = $(libmca_common_netpatterns_la_SOURCES)
38.83 -
38.84 -# These two rules will sym link the "noinst" libtool library filename
38.85 -# to the installable libtool library filename in the case where we are
38.86 -# compiling this component statically (case 2), described above).
38.87 -
38.88 -all-local:
38.89 - if test -z "$(lib_LTLIBRARIES)"; then \
38.90 - rm -f "$(comp_inst)"; \
38.91 - $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
38.92 - fi
38.93 -
38.94 -clean-local:
38.95 - if test -z "$(lib_LTLIBRARIES)"; then \
38.96 - rm -f "$(comp_inst)"; \
38.97 - fi
39.1 --- a/ompi/mca/common/netpatterns/common_allreduce.c Tue Feb 19 22:36:41 2013 +0000
39.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
39.3 @@ -1,344 +0,0 @@
39.4 -/*
39.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
39.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
39.7 - * $COPYRIGHT$
39.8 - *
39.9 - * Additional copyrights may follow
39.10 - *
39.11 - * $HEADER$
39.12 - */
39.13 -/** @file */
39.14 -
39.15 -#include "ompi_config.h"
39.16 -
39.17 -#include "ompi/constants.h"
39.18 -#include "coll_sm2.h"
39.19 -#include "ompi/op/op.h"
39.20 -#include "ompi/datatype/ompi_datatype.h"
39.21 -#include "ompi/communicator/communicator.h"
39.22 -
39.23 -orte_rml_callback_fn_t send_completion(nt status, struct orte_process_name_t* peer, struct iovec* msg,
39.24 - int count, orte_rml_tag_t tag, void* cbdata)
39.25 -{
39.26 - /* set send completion flag */
39.27 - *(int *)cbdata=1;
39.28 -}
39.29 -
39.30 -
39.31 -orte_rml_module_recv_nb_fn_t recv_completion(nt status, struct orte_process_name_t* peer, struct iovec* msg,
39.32 - int count, orte_rml_tag_t tag, void* cbdata)
39.33 -{
39.34 - /* set receive completion flag */
39.35 - MB();
39.36 - *(int *)cbdata=1;
39.37 -}
39.38 -
39.39 -
39.40 -static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int count,
39.41 - int data_type)
39.42 -{
39.43 - /* local variables */
39.44 - int ret;
39.45 -
39.46 - /* op type */
39.47 - switch (op_type) {
39.48 -
39.49 - case OP_SUM:
39.50 -
39.51 -
39.52 - switch (data_type) {
39.53 - case TYPE_INT4:
39.54 - int *int_src_ptr=(int *)src_ptr;
39.55 - int *int_src_dst_ptr=(int *)src_dst_ptr;
39.56 - int cnt;
39.57 - for(cnt=0 ; cnt < count ; ) {
39.58 - (*(int_src_dst_ptr))+=(*(int_src_ptr));
39.59 - break;
39.60 - default:
39.61 - ret=OMPI_ERROR;
39.62 - goto Error;
39.63 - }
39.64 -
39.65 - break;
39.66 -
39.67 - default:
39.68 - ret=OMPI_ERROR;
39.69 - goto Error;
39.70 - }
39.71 -Error:
39.72 - return ret;
39.73 -}
39.74 -
39.75 -/**
39.76 - * All-reduce for contigous primitive types
39.77 - */
39.78 -static
39.79 -comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
39.80 - int op_type, opal_list_t *peers)
39.81 -{
39.82 - /* local variables */
39.83 - int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
39.84 - int pair_rank,exchange,extra_rank;
39.85 - int index_read,index_write;
39.86 - mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
39.87 - int my_rank,count_processed,count_this_stripe;
39.88 - size_t n_peers,message_extent,len_data_buffer;
39.89 - size_t dt_size;
39.90 - long long tag, base_tag;
39.91 - sm_work_buffer_t *sm_buffer_desc;
39.92 - opal_list_item_t *item;
39.93 - char scratch_bufers[2][MAX_TMP_BUFFER];
39.94 - int send_buffer=0;recv_buffer=1;
39.95 - char *sbuf_current,*rbuf_current;
39.96 - ompi_proc_t **proc_array;
39.97 - struct iovec send_iov, recv_iov;
39.98 - volatile int *recv_done, *send_done;
39.99 - int recv_completion_flag, send_completion_flag;
39.100 - int data_type;
39.101 -
39.102 - /* get size of data needed - same layout as user data, so that
39.103 - * we can apply the reudction routines directly on these buffers
39.104 - */
39.105 - rc=opal_datatype_type_size(dtype, &dt_size);
39.106 - if( OMPI_SUCCESS != rc ) {
39.107 - goto Error;
39.108 - }
39.109 - message_extent=dt_extent*count;
39.110 -
39.111 - /* lenght of control and data regions */
39.112 - len_data_buffer=sm_module->data_memory_per_proc_per_segment;
39.113 -
39.114 - /* number of data types copies that the scratch buffer can hold */
39.115 - n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_size;
39.116 - if ( 0 == n_dts_per_buffer ) {
39.117 - rc=OMPI_ERROR;
39.118 - goto Error;
39.119 - }
39.120 -
39.121 - /* need a read and a write buffer for a pair-wise exchange of data */
39.122 - n_dts_per_buffer/=2;
39.123 - len_data_buffer=n_dts_per_buffer*dt_size;
39.124 -
39.125 - /* compute number of stripes needed to process this collective */
39.126 - n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
39.127 -
39.128 - /* */
39.129 - n_peers=opal_list_get_size(peers);
39.130 -
39.131 - /* get my rank in the list */
39.132 - my_rank=0;
39.133 - for (item = opal_list_get_first(peers) ;
39.134 - item != opal_list_get_end(peers) ;
39.135 - item = opal_list_get_next(peers)) {
39.136 - if(ompi_proc_local()==(ompi_proc_t *)item){
39.137 - /* this is the pointer to my proc strucuture */
39.138 - break;
39.139 - }
39.140 - my_rank++;
39.141 - }
39.142 - proc_array=(ompi_proc_t **)malloc(sizeof(ompi_proc_t *)*n_peers);
39.143 - if( NULL == proc_array) {
39.144 - goto Error;
39.145 - }
39.146 - cnt=0;
39.147 - for (item = opal_list_get_first(peers) ;
39.148 - item != opal_list_get_end(peers) ;
39.149 - item = opal_list_get_next(peers)) {
39.150 - proc_array[cnt]=(ompi_proc_t *)item;
39.151 - cnt++;
39.152 - }
39.153 -
39.154 - /* get my reduction communication pattern */
39.155 - ret=mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node);
39.156 - if(OMPI_SUCCESS != ret){
39.157 - return ret;
39.158 - }
39.159 -
39.160 - /* setup flags for non-blocking communications */
39.161 - recv_done=&recv_completion_flag;
39.162 - send_done=&send_completion_flag;
39.163 -
39.164 - /* set data type */
39.165 - if(&opal_datatype_int4==dtype) {
39.166 - data_type=TYPE_INT4;
39.167 - }
39.168 -
39.169 - count_processed=0;
39.170 -
39.171 - /* get a pointer to the shared-memory working buffer */
39.172 - /* NOTE: starting with a rather synchronous approach */
39.173 - for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
39.174 -
39.175 - /* get number of elements to process in this stripe */
39.176 - count_this_stripe=n_dts_per_buffer;
39.177 - if( count_processed + count_this_stripe > count )
39.178 - count_this_stripe=count-count_processed;
39.179 -
39.180 - /* copy data from the input buffer into the temp buffer */
39.181 - sbuf_current=(char *)sbuf+count_processed*dt_size;
39.182 - memcopy(scratch_bufers[send_buffer],sbuf_current,count_this_stripe*dt_size);
39.183 -
39.184 - /* copy data in from the "extra" source, if need be */
39.185 - if(0 < my_exchange_node->n_extra_sources) {
39.186 -
39.187 - if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
39.188 -
39.189 - /*
39.190 - ** Receive data from extra node
39.191 - */
39.192 -
39.193 - extra_rank=my_exchange_node.rank_extra_source;
39.194 - recv_iov.iov_base=scratch_bufers[recv_buffer];
39.195 - recv_iov.iov_len=count_this_stripe*dt_size;
39.196 - rc = orte_rml.recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
39.197 - OMPI_RML_TAG_ALLREDUCE , 0);
39.198 - if(OMPI_SUCCESS != rc ) {
39.199 - goto Error;
39.200 - }
39.201 -
39.202 - /* apply collective operation to first half of the data */
39.203 - if( 0 < count_this_stripe ) {
39.204 - op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
39.205 - (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
39.206 - }
39.207 -
39.208 -
39.209 - } else {
39.210 -
39.211 - /*
39.212 - ** Send data to "partner" node
39.213 - */
39.214 - extra_rank=my_exchange_node.rank_extra_source;
39.215 - send_iov.iov_base=scratch_bufers[send_buffer];
39.216 - send_iov.iov_len=count_this_stripe*dt_size;
39.217 - rc = orte_rml.send(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
39.218 - OMPI_RML_TAG_ALLREDUCE , 0);
39.219 - if(OMPI_SUCCESS != rc ) {
39.220 - goto Error;
39.221 - }
39.222 - }
39.223 -
39.224 - /* change pointer to scratch buffer - this was we can send data
39.225 - ** that we have summed w/o a memory copy, and receive data into the
39.226 - ** other buffer, w/o fear of over writting data that has not yet
39.227 - ** completed being send
39.228 - */
39.229 - recv_buffer^=1;
39.230 - send_buffer^=1;
39.231 - }
39.232 -
39.233 - MB();
39.234 - /*
39.235 - * Signal parent that data is ready
39.236 - */
39.237 - tag=base_tag+1;
39.238 - my_ctl_pointer->flag=tag;
39.239 -
39.240 - /* loop over data exchanges */
39.241 - for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
39.242 -
39.243 - /* debug
39.244 - t4=opal_sys_timer_get_cycles();
39.245 - end debug */
39.246 -
39.247 -
39.248 - my_write_pointer=my_tmp_data_buffer[index_write];
39.249 - my_read_pointer=my_tmp_data_buffer[index_read];
39.250 -
39.251 - /* is the remote data read */
39.252 - pair_rank=my_exchange_node->rank_exchanges[exchange];
39.253 -
39.254 - *recv_done=0;
39.255 - *send_done=0;
39.256 - MB();
39.257 -
39.258 - /* post non-blocking receive */
39.259 - recv_iov.iov_base=scratch_bufers[send_buffer];
39.260 - recv_iov.iov_len=count_this_stripe*dt_size;
39.261 - rc = orte_rml.recv_nb(&(proc_array[extra_rank]->proc_name), recv_iov, 1,
39.262 - OMPI_RML_TAG_ALLREDUCE , 0, recv_completion, recv_done);
39.263 -
39.264 - /* post non-blocking send */
39.265 - send_iov.iov_base=scratch_bufers[send_buffer];
39.266 - send_iov.iov_len=count_this_stripe*dt_size;
39.267 - rc = orte_rml.send_nb(&(proc_array[extra_rank]->proc_name), send_iov, 1,
39.268 - OMPI_RML_TAG_ALLREDUCE , 0, send_completion, send_done);
39.269 -
39.270 - /* wait on receive completion */
39.271 - while(!(*recv_done) ) {
39.272 - opal_progress();
39.273 - }
39.274 -
39.275 - /* reduce the data */
39.276 - if( 0 < count_this_stripe ) {
39.277 - op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
39.278 - (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
39.279 - }
39.280 -
39.281 -
39.282 - /* get ready for next step */
39.283 - index_read=(exchange&1);
39.284 - index_write=((exchange+1)&1);
39.285 -
39.286 - /* wait on send completion */
39.287 - while(!(*send_done) ) {
39.288 - opal_progress();
39.289 - }
39.290 -
39.291 - }
39.292 -
39.293 - /* copy data in from the "extra" source, if need be */
39.294 - if(0 < my_exchange_node->n_extra_sources) {
39.295 -
39.296 - if ( EXTRA_NODE == my_exchange_node->node_type ) {
39.297 - /*
39.298 - ** receive the data
39.299 - ** */
39.300 - extra_rank=my_exchange_node->rank_extra_source;
39.301 -
39.302 - recv_iov.iov_base=scratch_bufers[recv_buffer];
39.303 - recv_iov.iov_len=count_this_stripe*dt_size;
39.304 - rc = orte_rml.recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
39.305 - OMPI_RML_TAG_ALLREDUCE , 0);
39.306 - if(OMPI_SUCCESS != rc ) {
39.307 - goto Error;
39.308 - }
39.309 -
39.310 - } else {
39.311 - /* send the data to the pair-rank outside of the power of 2 set
39.312 - ** of ranks
39.313 - */
39.314 -
39.315 - extra_rank=my_exchange_node->rank_extra_source;
39.316 - send_iov.iov_base=scratch_bufers[recv_buffer];
39.317 - send_iov.iov_len=count_this_stripe*dt_size;
39.318 - rc = orte_rml.recv(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
39.319 - OMPI_RML_TAG_ALLREDUCE , 0);
39.320 - if(OMPI_SUCCESS != rc ) {
39.321 - goto Error;
39.322 - }
39.323 - }
39.324 - }
39.325 -
39.326 - /* copy data into the destination buffer */
39.327 - rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
39.328 - (char *)((char *)rbuf+dt_extent*count_processed),
39.329 - (char *)my_write_pointer);
39.330 - if( 0 != rc ) {
39.331 - return OMPI_ERROR;
39.332 - }
39.333 -
39.334 - /* copy data from the temp buffer into the output buffer */
39.335 - rbuf_current=(char *)rbuf+count_processed*dt_size;
39.336 - memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size);
39.337 -
39.338 - /* update the count of elements processed */
39.339 - count_processed+=count_this_stripe;
39.340 - }
39.341 -
39.342 - /* return */
39.343 - return rc;
39.344 -
39.345 -Error:
39.346 - return rc;
39.347 -}
40.1 --- a/ompi/mca/common/netpatterns/common_coll_ops.h Tue Feb 19 22:36:41 2013 +0000
40.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
40.3 @@ -1,29 +0,0 @@
40.4 -/*
40.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
40.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
40.7 - * $COPYRIGHT$
40.8 - *
40.9 - * Additional copyrights may follow
40.10 - *
40.11 - * $HEADER$
40.12 - */
40.13 -
40.14 -#ifndef COMM_OP_TYPES_H
40.15 -#define COMM_OP_TYPES_H
40.16 -
40.17 -#include "ompi_config.h"
40.18 -
40.19 -BEGIN_C_DECLS
40.20 -
40.21 -int comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
40.22 - int op, opal_list_t *peers);
40.23 -
40.24 -/* reduction operations supported */
40.25 -#define OP_SUM 1
40.26 -
40.27 -#define TYPE_INT4 1
40.28 -
40.29 -
40.30 -END_C_DECLS
40.31 -
40.32 -#endif /* COMM_OP_TYPES_H */
41.1 --- a/ompi/mca/common/netpatterns/common_netpatterns.h Tue Feb 19 22:36:41 2013 +0000
41.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
41.3 @@ -1,147 +0,0 @@
41.4 -/*
41.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
41.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
41.7 - * $COPYRIGHT$
41.8 - *
41.9 - * Additional copyrights may follow
41.10 - *
41.11 - * $HEADER$
41.12 - */
41.13 -
41.14 -#ifndef COMM_PATTERNS_H
41.15 -#define COMM_PATTERNS_H
41.16 -
41.17 -#include "ompi_config.h"
41.18 -#include "orte/runtime/orte_globals.h"
41.19 -#include "common_netpatterns_knomial_tree.h"
41.20 -
41.21 -BEGIN_C_DECLS
41.22 -
41.23 -int ompi_common_netpatterns_base_err(const char* fmt, ...);
41.24 -int ompi_common_netpatterns_register_mca_params(void);
41.25 -
41.26 -#if OPAL_ENABLE_DEBUG
41.27 -extern int ompi_common_netpatterns_base_verbose; /* disabled by default */
41.28 -OMPI_DECLSPEC extern int ompi_common_netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
41.29 -#define NETPATTERNS_VERBOSE(args) \
41.30 - do { \
41.31 - if(ompi_common_netpatterns_base_verbose > 0) { \
41.32 - ompi_common_netpatterns_base_err("[%s]%s[%s:%d:%s] ",\
41.33 - orte_process_info.nodename, \
41.34 - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
41.35 - __FILE__, __LINE__, __func__); \
41.36 - ompi_common_netpatterns_base_err args; \
41.37 - ompi_common_netpatterns_base_err("\n"); \
41.38 - } \
41.39 - } while(0);
41.40 -#else
41.41 -#define NETPATTERNS_VERBOSE(args)
41.42 -#endif
41.43 -
41.44 -#define FIND_BASE(base,myid,level,k) \
41.45 - do { \
41.46 - int temp = 1; \
41.47 - int jj; \
41.48 - int knt2; \
41.49 - \
41.50 - base = 0; \
41.51 - for( jj = 0; jj < level; jj++) {\
41.52 - temp *= k; \
41.53 - } \
41.54 - knt2 = 1; \
41.55 - while(myid >= knt2*temp){ \
41.56 - knt2++; \
41.57 - } \
41.58 - base = knt2*temp - temp; \
41.59 - } while(0) \
41.60 -
41.61 -
41.62 -
41.63 -
41.64 -/* enum for node type */
41.65 -enum {
41.66 - ROOT_NODE,
41.67 - LEAF_NODE,
41.68 - INTERIOR_NODE
41.69 -};
41.70 -
41.71 -
41.72 -/*
41.73 - * N-order tree node description
41.74 - */
41.75 -struct mca_common_netpatterns_tree_node_t {
41.76 - /* my rank within the group */
41.77 - int my_rank;
41.78 - /* my node type - root, leaf, or interior */
41.79 - int my_node_type;
41.80 - /* number of nodes in the tree */
41.81 - int tree_size;
41.82 - /* number of parents (0/1) */
41.83 - int n_parents;
41.84 - /* number of children */
41.85 - int n_children;
41.86 - /* parent rank within the group */
41.87 - int parent_rank;
41.88 - /* chidren ranks within the group */
41.89 - int *children_ranks;
41.90 -};
41.91 -typedef struct mca_common_netpatterns_tree_node_t mca_common_netpatterns_tree_node_t;
41.92 -
41.93 -struct mca_common_netpatterns_k_exchange_node_t;
41.94 -/*
41.95 - * N-order + knominal tree node description
41.96 - */
41.97 -struct mca_common_netpatterns_narray_knomial_tree_node_t {
41.98 - /* my rank within the group */
41.99 - int my_rank;
41.100 - /* my node type - root, leaf, or interior */
41.101 - int my_node_type;
41.102 - /* number of nodes in the tree */
41.103 - int tree_size;
41.104 - /* number of parents (0/1) */
41.105 - int n_parents;
41.106 - /* number of children */
41.107 - int n_children;
41.108 - /* parent rank within the group */
41.109 - int parent_rank;
41.110 - /* chidren ranks within the group */
41.111 - int *children_ranks;
41.112 - /* Total number of ranks on this specific level */
41.113 - int level_size;
41.114 - /* Rank on this node inside of level */
41.115 - int rank_on_level;
41.116 - /* Knomial recursive gather information */
41.117 - struct mca_common_netpatterns_k_exchange_node_t k_node;
41.118 -};
41.119 -typedef struct mca_common_netpatterns_narray_knomial_tree_node_t
41.120 -mca_common_netpatterns_narray_knomial_tree_node_t;
41.121 -
41.122 -
41.123 -/* Init code for common_netpatterns */
41.124 -OMPI_DECLSPEC int ompi_common_netpatterns_init(void);
41.125 -
41.126 -/* setup an n-array tree */
41.127 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
41.128 - mca_common_netpatterns_tree_node_t *my_node);
41.129 -/* setup an n-array tree with k-nomial levels */
41.130 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes,
41.131 - mca_common_netpatterns_narray_knomial_tree_node_t *my_node);
41.132 -
41.133 -/* setup an multi-nomial tree - for each node in the tree
41.134 - * this returns it's parent, and it's children
41.135 - */
41.136 -OMPI_DECLSPEC int mca_common_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
41.137 - mca_common_netpatterns_tree_node_t *tree_nodes);
41.138 -
41.139 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree_contigous_ranks(int tree_order,
41.140 - int num_nodes, mca_common_netpatterns_tree_node_t **tree_nodes);
41.141 -
41.142 -/* calculate the nearest power of radix that is equal to or greater
41.143 - * than size, with the specified radix. The resulting tree is of
41.144 - * depth n_lvls.
41.145 - */
41.146 -OMPI_DECLSPEC int roundup_to_power_radix( int radix, int size, int *n_lvls );
41.147 -
41.148 -END_C_DECLS
41.149 -
41.150 -#endif /* COMM_PATTERNS_H */
42.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_base.c Tue Feb 19 22:36:41 2013 +0000
42.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
42.3 @@ -1,53 +0,0 @@
42.4 -/*
42.5 - *
42.6 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
42.7 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
42.8 - * $COPYRIGHT$
42.9 - *
42.10 - * Additional copyrights may follow
42.11 - *
42.12 - * $HEADER$
42.13 - */
42.14 -#include "opal/mca/base/mca_base_param.h"
42.15 -#include "ompi/include/ompi/constants.h"
42.16 -#include "common_netpatterns.h"
42.17 -
42.18 -int ompi_common_netpatterns_base_verbose = 0; /* disabled by default */
42.19 -
42.20 -int ompi_common_netpatterns_register_mca_params(void)
42.21 -{
42.22 - mca_base_param_reg_int_name("common",
42.23 - "netpatterns_base_verbose",
42.24 - "Verbosity level of the NETPATTERNS framework",
42.25 - false, false,
42.26 - 0,
42.27 - &ompi_common_netpatterns_base_verbose);
42.28 -
42.29 - return OMPI_SUCCESS;
42.30 -}
42.31 -
42.32 -int ompi_common_netpatterns_base_err(const char* fmt, ...)
42.33 -{
42.34 - va_list list;
42.35 - int ret;
42.36 -
42.37 - va_start(list, fmt);
42.38 - ret = vfprintf(stderr, fmt, list);
42.39 - va_end(list);
42.40 - return ret;
42.41 -}
42.42 -
42.43 -int ompi_common_netpatterns_init(void)
42.44 -{
42.45 -/* There is no component for common_netpatterns so every component that uses it
42.46 - should call ompi_common_netpatterns_init, still we want to run it only once */
42.47 -static int was_called = 0;
42.48 -
42.49 - if (0 == was_called) {
42.50 - was_called = 1;
42.51 -
42.52 - return ompi_common_netpatterns_register_mca_params();
42.53 - }
42.54 -
42.55 - return OMPI_SUCCESS;
42.56 -}
43.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.c Tue Feb 19 22:36:41 2013 +0000
43.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
43.3 @@ -1,932 +0,0 @@
43.4 -/*
43.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
43.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
43.7 - * $COPYRIGHT$
43.8 - *
43.9 - * Additional copyrights may follow
43.10 - *
43.11 - * $HEADER$
43.12 - */
43.13 -
43.14 -#include "ompi_config.h"
43.15 -#ifdef HAVE_UNISTD_H
43.16 -#include <unistd.h>
43.17 -#endif
43.18 -#include <sys/types.h>
43.19 -#ifdef HAVE_SYS_MMAN_H
43.20 -#include <sys/mman.h>
43.21 -#endif
43.22 -#include <fcntl.h>
43.23 -#include <stdlib.h>
43.24 -#include <assert.h>
43.25 -
43.26 -#include "ompi/constants.h"
43.27 -#include "common_netpatterns.h"
43.28 -
43.29 -/* setup recursive doubleing tree node */
43.30 -
43.31 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
43.32 - int num_nodes, int node_rank, int tree_order, int *hier_ranks,
43.33 - mca_common_netpatterns_k_exchange_node_t *exchange_node)
43.34 -{
43.35 - /* local variables */
43.36 - int i, j, cnt, i_temp;
43.37 - int knt,knt2,kk, ex_node, stray;
43.38 - int n_levels,pow_k;
43.39 - int k_temp1;
43.40 - int k_temp2;
43.41 - int myid, reindex_myid = 0;
43.42 - int base, peer_base,base_temp;
43.43 - int peer;
43.44 - int *prev_data = NULL;
43.45 - int *current_data = NULL;
43.46 - int *group_info = NULL;
43.47 -
43.48 -
43.49 - NETPATTERNS_VERBOSE(
43.50 - ("Enter mca_common_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
43.51 - num_nodes, node_rank, tree_order));
43.52 -
43.53 - assert(num_nodes > 1);
43.54 - assert(tree_order > 1);
43.55 - if (tree_order > num_nodes) {
43.56 - tree_order = num_nodes;
43.57 - }
43.58 -
43.59 - /* k-nomial radix */
43.60 - exchange_node->tree_order = tree_order;
43.61 -
43.62 - /* Calculate the number of levels in the tree for
43.63 - * the largest power of tree_order less than or
43.64 - * equal to the group size
43.65 - */
43.66 - n_levels = 0;
43.67 - cnt=1;
43.68 - while ( num_nodes > cnt ) {
43.69 - cnt *= tree_order;
43.70 - n_levels++;
43.71 - }
43.72 - /* this is the actual number of recusive k-ing steps
43.73 - * we will perform, the last step may not be a full
43.74 - * step depending on the outcome of the next conditional
43.75 - */
43.76 - pow_k = n_levels;
43.77 -
43.78 - /* figure out the largest power of tree_order that is less than or equal to
43.79 - * num_nodes */
43.80 - if ( cnt > num_nodes) {
43.81 - cnt /= tree_order;
43.82 - n_levels--;
43.83 - }
43.84 -
43.85 - /*exchange_node->log_tree_order = n_levels;*/
43.86 - exchange_node->log_tree_order = pow_k;
43.87 - exchange_node->n_largest_pow_tree_order = cnt;
43.88 -
43.89 -
43.90 - /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
43.91 - /* I don't think we need to cache this info this group_info array */
43.92 - group_info = (int *) calloc(pow_k , sizeof(int));
43.93 - group_info[0] = num_nodes/tree_order;
43.94 - /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
43.95 - for ( i = 1; i < pow_k; i ++) {
43.96 - group_info[i] = group_info[i-1]/tree_order;
43.97 - /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
43.98 -
43.99 - }
43.100 -
43.101 - /* find number of incomplete groups and number of ranks belonging to those ranks */
43.102 - knt=0;
43.103 - while (knt <= (pow_k - 1) && group_info[knt] > 0) {
43.104 - knt++;
43.105 - }
43.106 - knt--;
43.107 - /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
43.108 -
43.109 - /* k_temp is a synonym for cnt which is the largest full power of k group */
43.110 - /* now, start the calculation to find the first stray rank aka "extra" rank */
43.111 - stray = 0;
43.112 - /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt,
43.113 - cnt*group_info[knt],
43.114 - num_nodes - cnt*group_info[knt]);*/
43.115 -
43.116 -
43.117 - /* cache this info, it's muy importante */
43.118 - stray = cnt*group_info[knt];
43.119 - exchange_node->k_nomial_stray = stray;
43.120 -
43.121 -
43.122 -
43.123 - /* before we do this, we need to first reindex */
43.124 - /* reindexing phase */
43.125 - /* this is the reindex phase */
43.126 - exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
43.127 - /* this is the inverse map */
43.128 - exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
43.129 - /*int reindex_myid;*/
43.130 - /* reindex */
43.131 - if( stray < num_nodes ) {
43.132 - /* find the first proxy rank */
43.133 - peer = stray - cnt;
43.134 - /* fix all ranks prior to this rank */
43.135 - for( i = 0; i < peer; i++){
43.136 - exchange_node->reindex_map[i] = i;
43.137 - }
43.138 - /* now, start the swap */
43.139 - exchange_node->reindex_map[peer] = peer;
43.140 - for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
43.141 - exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
43.142 - }
43.143 - i_temp = i;
43.144 - for( i = i_temp; i < stray; i++) {
43.145 - exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
43.146 - }
43.147 - /* now, finish it off */
43.148 - exchange_node->reindex_map[stray] = peer + 1;
43.149 - for( i = (stray+1); i < num_nodes; i++) {
43.150 - exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
43.151 - }
43.152 - /* debug print */
43.153 - /*
43.154 - for( i = 0; i < np; i++){
43.155 - fprintf(stderr,"%d ",reindex_map[i]);
43.156 - }
43.157 - fprintf(stderr,"\n");
43.158 - */
43.159 - } else {
43.160 - /* we have no extras, trivial reindexing */
43.161 - for( i = 0; i < num_nodes; i++){
43.162 - exchange_node->reindex_map[i] = i;
43.163 - }
43.164 - }
43.165 - /* finished reindexing */
43.166 -
43.167 - /* Now, I need to get my rank in the new indexing */
43.168 - for( i = 0; i < num_nodes; i++ ){
43.169 - if( node_rank == exchange_node->reindex_map[i] ){
43.170 - exchange_node->reindex_myid = i;
43.171 - break;
43.172 - }
43.173 - }
43.174 - /* Now, let's compute the inverse mapping here */
43.175 - for( i = 0; i < num_nodes; i++){
43.176 - j = 0;
43.177 - while(exchange_node->reindex_map[j] != i ){
43.178 - j++;
43.179 - }
43.180 - exchange_node->inv_reindex_map[i] = j;
43.181 - }
43.182 -
43.183 -
43.184 - /* Now we get the data sizes we should expect at each level */
43.185 - /* now get the size of the data I am to receive from each peer */
43.186 - /*int **payload_info;*/
43.187 - prev_data = (int *) malloc( num_nodes*sizeof(int) );
43.188 - if( NULL == prev_data ) {
43.189 - goto Error;
43.190 - }
43.191 -
43.192 - current_data = (int *) malloc( num_nodes*sizeof(int) );
43.193 - if( NULL == current_data ) {
43.194 - goto Error;
43.195 - }
43.196 -
43.197 -
43.198 - exchange_node->payload_info = (mca_common_netpatterns_payload_t **) malloc(sizeof(mca_common_netpatterns_payload_t *)*pow_k);
43.199 - if( NULL == exchange_node->payload_info) {
43.200 - goto Error;
43.201 - }
43.202 -
43.203 - for(i = 0; i < pow_k; i++){
43.204 - exchange_node->payload_info[i] = (mca_common_netpatterns_payload_t *) malloc(sizeof(mca_common_netpatterns_payload_t)*(tree_order-1));
43.205 - if( NULL == exchange_node->payload_info[i]) {
43.206 - goto Error;
43.207 - }
43.208 -
43.209 - }
43.210 - /* intialize the payload array
43.211 - This is the money struct, just need to initialize this with
43.212 - the subgroup information */
43.213 - /*
43.214 - for(i = 0; i < num_nodes; i++){
43.215 - prev_data[i] = 1;
43.216 - current_data[i] = 1;
43.217 - }
43.218 - */
43.219 -
43.220 - for(i = 0; i < num_nodes; i++){
43.221 - prev_data[i] = hier_ranks[i];
43.222 - current_data[i] = hier_ranks[i];
43.223 - }
43.224 -
43.225 - /* everyone will need to do this loop over all ranks
43.226 - * Phase I calculate the contribution from the extra ranks
43.227 - */
43.228 - for( myid = 0; myid < num_nodes; myid++) {
43.229 - /* get my new rank */
43.230 - for( j = 0; j < num_nodes; j++ ){
43.231 - /* this will be satisfied for one of the indices */
43.232 - if( myid == exchange_node->reindex_map[j] ){
43.233 - reindex_myid = j;
43.234 - break;
43.235 - }
43.236 - }
43.237 -
43.238 - for( j = stray; j < num_nodes; j++) {
43.239 - if(reindex_myid == ( j - cnt )) {
43.240 - /* then this is a proxy rank */
43.241 - prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
43.242 - break;
43.243 - }
43.244 -
43.245 - }
43.246 - }
43.247 -
43.248 - /* Phase II calculate the contribution from each recursive k - ing level
43.249 - *
43.250 - */
43.251 - k_temp1 = tree_order; /* k^1 */
43.252 - k_temp2 = 1; /* k^0 */
43.253 - peer_base = 0;
43.254 - base_temp = 0;
43.255 - for( i = 0; i < pow_k; i++) {
43.256 - /* get my new rank */
43.257 - for( myid = 0; myid < num_nodes; myid++){
43.258 - current_data[myid] = prev_data[myid];
43.259 - /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
43.260 - for( j = 0; j < num_nodes; j++ ){
43.261 - if( myid == exchange_node->reindex_map[j] ){
43.262 - reindex_myid = j;
43.263 - break;
43.264 - }
43.265 - }
43.266 - if( reindex_myid < stray ) {
43.267 - /* now start the actual algorithm */
43.268 - FIND_BASE(base,reindex_myid,i+1,tree_order);
43.269 - for( j = 0; j < ( tree_order - 1 ); j ++ ) {
43.270 - peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
43.271 - if( peer < stray ) {
43.272 - /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
43.273 - /* then get the data */
43.274 - if( node_rank == myid ){
43.275 - exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
43.276 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
43.277 - if( i > 0 ) {
43.278 -
43.279 - /* find my len and offset */
43.280 - FIND_BASE(peer_base,peer,i,tree_order);
43.281 - /* I do not want to mess with this, but it seems that I have no choice */
43.282 - ex_node = exchange_node->reindex_map[peer_base];
43.283 - /* now, find out how far down the line this guy really is */
43.284 - knt2 =0;
43.285 - for(kk = 0; kk < ex_node; kk++){
43.286 - knt2 += hier_ranks[kk];
43.287 - }
43.288 - exchange_node->payload_info[i][j].r_offset = knt2;
43.289 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
43.290 -
43.291 - FIND_BASE(base_temp,reindex_myid,i,tree_order);
43.292 - ex_node = exchange_node->reindex_map[base_temp];
43.293 - knt2 = 0;
43.294 - for( kk = 0; kk < ex_node; kk++){
43.295 - knt2 += hier_ranks[kk];
43.296 - }
43.297 - exchange_node->payload_info[i][j].s_offset =
43.298 - knt2; /* exchange_node->reindex_map[base_temp]; */
43.299 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
43.300 - } else {
43.301 - ex_node = exchange_node->reindex_map[peer];
43.302 - knt2 =0;
43.303 - for(kk = 0; kk < ex_node; kk++){
43.304 - knt2 += hier_ranks[kk];
43.305 - }
43.306 - exchange_node->payload_info[i][j].r_offset =
43.307 - knt2; /*exchange_node->reindex_map[peer]; */
43.308 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
43.309 - knt2 = 0;
43.310 - for(kk = 0; kk < myid; kk++){
43.311 - knt2 += hier_ranks[kk];
43.312 - }
43.313 - exchange_node->payload_info[i][j].s_offset = knt2;
43.314 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
43.315 - }
43.316 - /* how much I am to receive from this peer on this level */
43.317 - /* how much I am to send to this peer on this level */
43.318 - exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
43.319 - /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
43.320 - /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
43.321 - prev_data[exchange_node->reindex_map[peer]],
43.322 - exchange_node->reindex_map[peer], i+1);*/
43.323 - /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
43.324 - exchange_node->reindex_map[peer],i+1);*/
43.325 - }
43.326 -
43.327 - current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
43.328 - }
43.329 - }
43.330 - }
43.331 -
43.332 -
43.333 - }
43.334 - k_temp1 *= tree_order;
43.335 - k_temp2 *= tree_order;
43.336 - /* debug print */
43.337 - /* fprintf(stderr,"Level %d current data ",i+1);*/
43.338 - for( j = 0; j < num_nodes; j++){
43.339 - /* fprintf(stderr,"%d ",current_data[j]); */
43.340 - prev_data[j] = current_data[j];
43.341 - }
43.342 - /* fprintf(stderr,"\n");*/
43.343 -
43.344 - }
43.345 -
43.346 -
43.347 - /* this is the natural way to do recursive k-ing */
43.348 - /* should never have more than one extra rank per proxy */
43.349 - if( exchange_node->reindex_myid >= stray ){
43.350 - /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
43.351 - exchange_node->node_type = EXTRA_NODE;
43.352 - } else {
43.353 - exchange_node->node_type = EXCHANGE_NODE;
43.354 - }
43.355 -
43.356 - /* set node characteristics - node that is not within the largest
43.357 - * power of tree_order will just send its data to node that will participate
43.358 - * in the recursive k-ing, and get the result back at the end.
43.359 - * set the initial and final data exchanges - those that are not
43.360 - * part of the recursive k-ing.
43.361 - */
43.362 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.363 - exchange_node->n_extra_sources = 0;
43.364 - for( i = stray; i < num_nodes; i++) {
43.365 - if(exchange_node->reindex_myid == ( i - cnt )) {
43.366 - /* then I am a proxy rank and there is only a
43.367 - * single extra source
43.368 - */
43.369 - exchange_node->n_extra_sources = 1;
43.370 - break;
43.371 - }
43.372 - }
43.373 -
43.374 - if (exchange_node->n_extra_sources > 0) {
43.375 - exchange_node->rank_extra_sources_array = (int *) malloc
43.376 - (exchange_node->n_extra_sources * sizeof(int));
43.377 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.378 - goto Error;
43.379 - }
43.380 - /* you broke above */
43.381 - exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
43.382 - } else {
43.383 - exchange_node->rank_extra_sources_array = NULL;
43.384 - }
43.385 - } else {
43.386 - /* I am an extra rank, find my proxy rank */
43.387 - exchange_node->n_extra_sources = 1;
43.388 -
43.389 - exchange_node->rank_extra_sources_array = (int *) malloc
43.390 - (exchange_node->n_extra_sources * sizeof(int));
43.391 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.392 - goto Error;
43.393 - }
43.394 - exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
43.395 - }
43.396 -
43.397 -
43.398 - /* set the exchange pattern */
43.399 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.400 - /* yep, that's right PLUS 1 */
43.401 - exchange_node->n_exchanges = n_levels + 1;
43.402 - /* initialize this */
43.403 - exchange_node->n_actual_exchanges = 0;
43.404 - /* Allocate 2 dimension array thak keeps
43.405 - rank exchange information for each step*/
43.406 - exchange_node->rank_exchanges = (int **) malloc
43.407 - (exchange_node->n_exchanges * sizeof(int *));
43.408 - if(NULL == exchange_node->rank_exchanges) {
43.409 - goto Error;
43.410 - }
43.411 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.412 - exchange_node->rank_exchanges[i] = (int *) malloc
43.413 - ((tree_order - 1) * sizeof(int));
43.414 - if( NULL == exchange_node->rank_exchanges ) {
43.415 - goto Error;
43.416 - }
43.417 - }
43.418 - k_temp1 = tree_order;
43.419 - k_temp2 = 1;
43.420 - /* fill in exchange partners */
43.421 - /* Ok, now we start with the actual algorithm */
43.422 - for( i = 0; i < exchange_node->n_exchanges; i ++) {
43.423 - /*fprintf(stderr,"Starting Level %d\n",i+1);*/
43.424 -
43.425 - FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
43.426 - /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
43.427 - for( j = 0; j < (tree_order-1); j ++ ) {
43.428 - peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
43.429 - if ( peer < stray ) {
43.430 - exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
43.431 - /* an actual exchange occurs, bump the counter */
43.432 -
43.433 - } else {
43.434 - /* out of range, skip it - do not bump the n_actual_exchanges counter */
43.435 - exchange_node->rank_exchanges[i][j] = -1;
43.436 - }
43.437 -
43.438 - }
43.439 - k_temp1 *= tree_order;
43.440 - k_temp2 *= tree_order;
43.441 - }
43.442 - for(i = 0; i < pow_k; i++){
43.443 - for(j = 0; j < (tree_order-1); j++){
43.444 - if(-1 != exchange_node->rank_exchanges[i][j]){
43.445 - /* then bump the counter */
43.446 - exchange_node->n_actual_exchanges++;
43.447 - }
43.448 - }
43.449 - }
43.450 -
43.451 - } else {
43.452 - /* we are extra ranks and we don't participate in the exchange :( */
43.453 - exchange_node->n_exchanges=0;
43.454 - exchange_node->rank_exchanges=NULL;
43.455 - }
43.456 -
43.457 -
43.458 - /* set the number of tags needed per stripe - this must be the
43.459 - * same across all procs in the communicator.
43.460 - */
43.461 - /* do we need this one */
43.462 - exchange_node->n_tags = tree_order * n_levels + 1;
43.463 -
43.464 - free(prev_data);
43.465 - free(current_data);
43.466 - free(group_info);
43.467 -
43.468 - /* successful return */
43.469 - return OMPI_SUCCESS;
43.470 -
43.471 -Error:
43.472 -
43.473 - if (NULL != exchange_node->rank_extra_sources_array) {
43.474 - free(exchange_node->rank_extra_sources_array);
43.475 - }
43.476 -
43.477 - if (NULL != exchange_node->rank_exchanges) {
43.478 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.479 - if (NULL != exchange_node->rank_exchanges[i]) {
43.480 - free(exchange_node->rank_exchanges[i]);
43.481 - }
43.482 - }
43.483 - free(exchange_node->rank_exchanges);
43.484 - }
43.485 -
43.486 - if (NULL != prev_data ){
43.487 - free(prev_data);
43.488 - }
43.489 -
43.490 - if(NULL != current_data) {
43.491 - free(current_data);
43.492 - }
43.493 -
43.494 - if(NULL != group_info) {
43.495 - free(group_info);
43.496 - }
43.497 -
43.498 - /* error return */
43.499 - return OMPI_ERROR;
43.500 -}
43.501 -
43.502 -
43.503 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_tree_node(
43.504 - int num_nodes, int node_rank, int tree_order,
43.505 - mca_common_netpatterns_k_exchange_node_t *exchange_node)
43.506 -{
43.507 - /* local variables */
43.508 - int i, j, tmp, cnt;
43.509 - int n_levels;
43.510 - int k_base, kpow_num, peer;
43.511 -
43.512 - NETPATTERNS_VERBOSE(
43.513 - ("Enter mca_common_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
43.514 - num_nodes, node_rank, tree_order));
43.515 -
43.516 - assert(num_nodes > 1);
43.517 - assert(tree_order > 1);
43.518 - if (tree_order > num_nodes) {
43.519 - tree_order = num_nodes;
43.520 - }
43.521 -
43.522 - exchange_node->tree_order = tree_order;
43.523 -
43.524 - /* figure out number of levels in the tree */
43.525 - n_levels = 0;
43.526 - /* cnt - number of ranks in given level */
43.527 - cnt=1;
43.528 - while ( num_nodes > cnt ) {
43.529 - cnt *= tree_order;
43.530 - n_levels++;
43.531 - };
43.532 -
43.533 - /* figure out the largest power of tree_order that is less than or equal to
43.534 - * num_nodes */
43.535 - if ( cnt > num_nodes) {
43.536 - cnt /= tree_order;
43.537 - n_levels--;
43.538 - }
43.539 -
43.540 - exchange_node->log_tree_order = n_levels;
43.541 - exchange_node->n_largest_pow_tree_order = cnt;
43.542 -
43.543 - /* set node characteristics - node that is not within the largest
43.544 - * power of tree_order will just send it's data to node that will participate
43.545 - * in the recursive doubling, and get the result back at the end.
43.546 - */
43.547 - if (node_rank + 1 > cnt) {
43.548 - exchange_node->node_type = EXTRA_NODE;
43.549 - } else {
43.550 - exchange_node->node_type = EXCHANGE_NODE;
43.551 - }
43.552 -
43.553 -
43.554 - /* set the initial and final data exchanges - those that are not
43.555 - * part of the recursive doubling.
43.556 - */
43.557 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.558 - exchange_node->n_extra_sources = 0;
43.559 - for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
43.560 - tmp < num_nodes && i < tree_order - 1;
43.561 - ++i, ++tmp) {
43.562 - ++exchange_node->n_extra_sources;
43.563 - }
43.564 -
43.565 - assert(exchange_node->n_extra_sources < tree_order);
43.566 -
43.567 - if (exchange_node->n_extra_sources > 0) {
43.568 - exchange_node->rank_extra_sources_array = (int *) malloc
43.569 - (exchange_node->n_extra_sources * sizeof(int));
43.570 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.571 - goto Error;
43.572 - }
43.573 - for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
43.574 - i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
43.575 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
43.576 - exchange_node->rank_extra_sources_array[i] = tmp;
43.577 - }
43.578 - } else {
43.579 - exchange_node->rank_extra_sources_array = NULL;
43.580 - }
43.581 - } else {
43.582 - exchange_node->n_extra_sources = 1;
43.583 - exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
43.584 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.585 - goto Error;
43.586 - }
43.587 - exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
43.588 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
43.589 - exchange_node->rank_extra_sources_array[0] ));
43.590 - }
43.591 -
43.592 - /* set the exchange pattern */
43.593 - if (EXCHANGE_NODE == exchange_node->node_type) {
43.594 - exchange_node->n_exchanges = n_levels;
43.595 - /* Allocate 2 dimension array thak keeps
43.596 - rank exchange information for each step*/
43.597 - exchange_node->rank_exchanges = (int **) malloc
43.598 - (exchange_node->n_exchanges * sizeof(int *));
43.599 - if(NULL == exchange_node->rank_exchanges) {
43.600 - goto Error;
43.601 - }
43.602 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.603 - exchange_node->rank_exchanges[i] = (int *) malloc
43.604 - ((tree_order - 1) * sizeof(int));
43.605 - if( NULL == exchange_node->rank_exchanges ) {
43.606 - goto Error;
43.607 - }
43.608 - }
43.609 - /* fill in exchange partners */
43.610 - for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges;
43.611 - i++, kpow_num *= tree_order) {
43.612 - k_base = node_rank / (kpow_num * tree_order);
43.613 - for(j = 1; j < tree_order; j++) {
43.614 - peer = node_rank + kpow_num * j;
43.615 - if (k_base != peer/(kpow_num * tree_order)) {
43.616 - /* Wraparound the number */
43.617 - peer = k_base * (kpow_num * tree_order) +
43.618 - peer % (kpow_num * tree_order);
43.619 - }
43.620 - exchange_node->rank_exchanges[i][j - 1] = peer;
43.621 - NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d",
43.622 - i, j, tree_order, peer));
43.623 - }
43.624 - }
43.625 - } else {
43.626 - exchange_node->n_exchanges=0;
43.627 - exchange_node->rank_exchanges=NULL;
43.628 - }
43.629 -
43.630 - /* set the number of tags needed per stripe - this must be the
43.631 - * same across all procs in the communicator.
43.632 - */
43.633 - /* do we need this one */
43.634 - exchange_node->n_tags = tree_order * n_levels + 1;
43.635 -
43.636 - /* successful return */
43.637 - return OMPI_SUCCESS;
43.638 -
43.639 -Error:
43.640 -
43.641 - if (NULL != exchange_node->rank_extra_sources_array) {
43.642 - free(exchange_node->rank_extra_sources_array);
43.643 - }
43.644 -
43.645 - if (NULL != exchange_node->rank_exchanges) {
43.646 - for (i = 0; i < exchange_node->n_exchanges; i++) {
43.647 - if (NULL != exchange_node->rank_exchanges[i]) {
43.648 - free(exchange_node->rank_exchanges[i]);
43.649 - }
43.650 - }
43.651 - free(exchange_node->rank_exchanges);
43.652 - }
43.653 -
43.654 - /* error return */
43.655 - return OMPI_ERROR;
43.656 -}
43.657 -
43.658 -#if 1
43.659 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
43.660 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.661 -{
43.662 - /* local variables */
43.663 - int i, tmp, cnt;
43.664 - int n_levels;
43.665 - int shift, mask;
43.666 -
43.667 - NETPATTERNS_VERBOSE(("Enter mca_common_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
43.668 -
43.669 - assert(num_nodes > 1);
43.670 - while (tree_order > num_nodes) {
43.671 - tree_order /= 2;
43.672 - }
43.673 -
43.674 - exchange_node->tree_order = tree_order;
43.675 - /* We support only tree_order that are power of two */
43.676 - assert(0 == (tree_order & (tree_order - 1)));
43.677 -
43.678 - /* figure out number of levels in the tree */
43.679 - n_levels = 0;
43.680 - /* cnt - number of ranks in given level */
43.681 - cnt=1;
43.682 - while ( num_nodes > cnt ) {
43.683 - cnt *= tree_order;
43.684 - n_levels++;
43.685 - };
43.686 -
43.687 - /* figure out the largest power of tree_order that is less than or equal to
43.688 - * num_nodes */
43.689 - if ( cnt > num_nodes) {
43.690 - cnt /= tree_order;
43.691 - n_levels--;
43.692 - }
43.693 - exchange_node->log_tree_order = n_levels;
43.694 - if (2 == tree_order) {
43.695 - exchange_node->log_2 = exchange_node->log_tree_order;
43.696 - }
43.697 -
43.698 - tmp=1;
43.699 - for (i=0 ; i < n_levels ; i++ ) {
43.700 - tmp *= tree_order;
43.701 - }
43.702 - /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
43.703 - assert(tmp == cnt);
43.704 -
43.705 - exchange_node->n_largest_pow_tree_order = tmp;
43.706 - if (2 == tree_order) {
43.707 - exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
43.708 - }
43.709 -
43.710 - /* set node characteristics - node that is not within the largest
43.711 - * power of tree_order will just send it's data to node that will participate
43.712 - * in the recursive doubling, and get the result back at the end.
43.713 - */
43.714 - if ( node_rank + 1 > cnt ) {
43.715 - exchange_node->node_type = EXTRA_NODE;
43.716 - } else {
43.717 - exchange_node->node_type = EXCHANGE_NODE;
43.718 - }
43.719 -
43.720 - /* set the initial and final data exchanges - those that are not
43.721 - * part of the recursive doubling.
43.722 - */
43.723 - if ( EXCHANGE_NODE == exchange_node->node_type ) {
43.724 - exchange_node->n_extra_sources = 0;
43.725 - for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
43.726 - ++exchange_node->n_extra_sources;
43.727 - }
43.728 - if (exchange_node->n_extra_sources > 0) {
43.729 - exchange_node->rank_extra_sources_array = (int *) malloc
43.730 - (exchange_node->n_extra_sources * sizeof(int));
43.731 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.732 - goto Error;
43.733 - }
43.734 - for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
43.735 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
43.736 - exchange_node->rank_extra_sources_array[i] = tmp;
43.737 - }
43.738 - } else {
43.739 - exchange_node->rank_extra_sources_array = NULL;
43.740 - }
43.741 - } else {
43.742 - exchange_node->n_extra_sources = 1;
43.743 - exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
43.744 - if( NULL == exchange_node->rank_extra_sources_array ) {
43.745 - goto Error;
43.746 - }
43.747 - exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
43.748 - NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
43.749 - }
43.750 -
43.751 - /* Ishai: To be compatable with the old structure - should be remoived later */
43.752 - if (1 == exchange_node->n_extra_sources) {
43.753 - exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
43.754 - } else {
43.755 - exchange_node->rank_extra_source = -1;
43.756 - }
43.757 -
43.758 - /* set the exchange pattern */
43.759 - if ( EXCHANGE_NODE == exchange_node->node_type ) {
43.760 - exchange_node->n_exchanges = n_levels * (tree_order - 1);
43.761 - exchange_node->rank_exchanges = (int *) malloc
43.762 - (exchange_node->n_exchanges * sizeof(int));
43.763 - if( NULL == exchange_node->rank_exchanges ) {
43.764 - goto Error;
43.765 - }
43.766 -
43.767 - /* fill in exchange partners */
43.768 - for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
43.769 - for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
43.770 - exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
43.771 - NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
43.772 - }
43.773 - }
43.774 -
43.775 - } else {
43.776 -
43.777 - exchange_node->n_exchanges=0;
43.778 - exchange_node->rank_exchanges=NULL;
43.779 -
43.780 - }
43.781 -
43.782 - /* set the number of tags needed per stripe - this must be the
43.783 - * same across all procs in the communicator.
43.784 - */
43.785 - /* Ishai: Need to find out what is n_tags */
43.786 - exchange_node->n_tags = tree_order * n_levels + 1;
43.787 -
43.788 - /* successful return */
43.789 - return OMPI_SUCCESS;
43.790 -
43.791 -Error:
43.792 - if (exchange_node->rank_extra_sources_array != NULL) {
43.793 - free(exchange_node->rank_extra_sources_array);
43.794 - }
43.795 -
43.796 - /* error return */
43.797 - return OMPI_ERROR;
43.798 -}
43.799 -
43.800 -OMPI_DECLSPEC void mca_common_netpatterns_free_recursive_doubling_tree_node(
43.801 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.802 -{
43.803 - NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
43.804 - if (exchange_node->rank_extra_sources_array != NULL) {
43.805 - free(exchange_node->rank_extra_sources_array);
43.806 - }
43.807 -
43.808 - if (exchange_node->rank_exchanges != NULL) {
43.809 - free(exchange_node->rank_exchanges);
43.810 - }
43.811 -}
43.812 -#endif
43.813 -
43.814 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
43.815 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.816 -{
43.817 - return mca_common_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
43.818 -}
43.819 -
43.820 -#if 0
43.821 -/*OMPI_DECLSPEC int old_mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
43.822 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
43.823 - mca_common_netpatterns_pair_exchange_node_t *exchange_node)
43.824 -{
43.825 - /* local variables */
43.826 - /*int tree_order;*/
43.827 - int i,tmp,cnt,result,n_extra_nodes;
43.828 - int n_exchanges;
43.829 -
43.830 - /* figure out number of levels in the tree */
43.831 -
43.832 - n_exchanges=0;
43.833 - result=num_nodes;
43.834 -/* tree_order=2;*/
43.835 - /* cnt - number of ranks in given level */
43.836 - cnt=1;
43.837 - while( num_nodes > cnt ) {
43.838 - cnt*=tree_order;
43.839 - n_exchanges++;
43.840 - };
43.841 -
43.842 - /* figure out the largest power of 2 that is less than or equal to
43.843 - * num_nodes */
43.844 - if( cnt > num_nodes) {
43.845 - cnt/=tree_order;
43.846 - n_exchanges--;
43.847 - }
43.848 - exchange_node->log_2=n_exchanges;
43.849 -
43.850 - tmp=1;
43.851 - for(i=0 ; i < n_exchanges ; i++ ) {
43.852 - tmp*=2;
43.853 - }
43.854 - exchange_node->n_largest_pow_2=tmp;
43.855 -
43.856 - /* set node characteristics - node that is not within the largest
43.857 - * power of 2 will just send it's data to node that will participate
43.858 - * in the recursive doubling, and get the result back at the end.
43.859 - */
43.860 - if( node_rank+1 > cnt ) {
43.861 - exchange_node->node_type=EXTRA_NODE;
43.862 - } else {
43.863 - exchange_node->node_type=EXCHANGE_NODE;
43.864 - }
43.865 -
43.866 - /* set the initial and final data exchanges - those that are not
43.867 - * part of the recursive doubling.
43.868 - */
43.869 - n_extra_nodes=num_nodes-cnt;
43.870 -
43.871 - if ( EXCHANGE_NODE == exchange_node->node_type ) {
43.872 -
43.873 - if( node_rank < n_extra_nodes ) {
43.874 - exchange_node->n_extra_sources=1;
43.875 - exchange_node->rank_extra_source=cnt+node_rank;
43.876 - } else {
43.877 - exchange_node->n_extra_sources=0;
43.878 - exchange_node->rank_extra_source=-1;
43.879 - }
43.880 -
43.881 - } else {
43.882 - exchange_node->n_extra_sources=1;
43.883 - exchange_node->rank_extra_source=node_rank-cnt;
43.884 - }
43.885 -
43.886 - /* set the exchange pattern */
43.887 - if( EXCHANGE_NODE == exchange_node->node_type ) {
43.888 -
43.889 - exchange_node->n_exchanges=n_exchanges;
43.890 - exchange_node->rank_exchanges=(int *) malloc
43.891 - (n_exchanges*sizeof(int));
43.892 - if( NULL == exchange_node->rank_exchanges ) {
43.893 - goto Error;
43.894 - }
43.895 -
43.896 - /* fill in exchange partners */
43.897 - result=1;
43.898 - tmp=node_rank;
43.899 - for( i=0 ; i < n_exchanges ; i++ ) {
43.900 - if(tmp & 1 ) {
43.901 - exchange_node->rank_exchanges[i]=
43.902 - node_rank-result;
43.903 - } else {
43.904 - exchange_node->rank_exchanges[i]=
43.905 - node_rank+result;
43.906 - }
43.907 - result*=2;
43.908 - tmp/=2;
43.909 - }
43.910 -
43.911 - } else {
43.912 -
43.913 - exchange_node->n_exchanges=0;
43.914 - exchange_node->rank_exchanges=NULL;
43.915 -
43.916 - }
43.917 -
43.918 - /* set the number of tags needed per stripe - this must be the
43.919 - * same across all procs in the communicator.
43.920 - */
43.921 - exchange_node->n_tags=2*n_exchanges+1;
43.922 -
43.923 - /* Ishai: to make sure free will work also for people that call this function */
43.924 - exchange_node->rank_extra_sources_array = NULL;
43.925 -
43.926 - /* successful return */
43.927 - return OMPI_SUCCESS;
43.928 -
43.929 -Error:
43.930 -
43.931 - /* error return */
43.932 - return OMPI_ERROR;
43.933 -}
43.934 -#endif
43.935 -
44.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h Tue Feb 19 22:36:41 2013 +0000
44.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
44.3 @@ -1,253 +0,0 @@
44.4 -/*
44.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
44.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
44.7 - * $COPYRIGHT$
44.8 - *
44.9 - * Additional copyrights may follow
44.10 - *
44.11 - * $HEADER$
44.12 - */
44.13 -
44.14 -#ifndef COMM_PATTERNS_KNOMIAL_TREE_H
44.15 -#define COMM_PATTERNS_KNOMIAL_TREE_H
44.16 -
44.17 -#include "ompi_config.h"
44.18 -#include "orte/runtime/orte_globals.h"
44.19 -
44.20 -BEGIN_C_DECLS
44.21 -
44.22 -
44.23 -/*
44.24 - * Pair-wise data exchange
44.25 - */
44.26 -
44.27 -/* enum for node type */
44.28 -enum {
44.29 - EXCHANGE_NODE,
44.30 - EXTRA_NODE
44.31 -};
44.32 -
44.33 -struct mca_common_netpatterns_pair_exchange_node_t {
44.34 -
44.35 - /* Order of a node in the tree - usually 2 */
44.36 - int tree_order;
44.37 -
44.38 - /* number of nodes this node will exchange data with */
44.39 - int n_exchanges;
44.40 -
44.41 - /* ranks of nodes involved in data exchnge */
44.42 - int *rank_exchanges;
44.43 -
44.44 - /* number of extra sources of data - outside largest power of 2 in
44.45 - * this group */
44.46 - int n_extra_sources;
44.47 -
44.48 - /* rank of the extra source */
44.49 - /* deprecated */ int rank_extra_source;
44.50 - int *rank_extra_sources_array;
44.51 -
44.52 - /* number of tags needed per stripe */
44.53 - int n_tags;
44.54 -
44.55 - /* log 2 of largest full power of 2 for this node set */
44.56 - /* deprecated */ int log_2;
44.57 - int log_tree_order;
44.58 -
44.59 - /* largest power of 2 that fits in this group */
44.60 - /* deprecated */ int n_largest_pow_2;
44.61 - int n_largest_pow_tree_order;
44.62 -
44.63 - /* node type */
44.64 - int node_type;
44.65 -
44.66 -};
44.67 -typedef struct mca_common_netpatterns_pair_exchange_node_t mca_common_netpatterns_pair_exchange_node_t;
44.68 -
44.69 -struct mca_common_netpatterns_payload_t {
44.70 - int s_len;
44.71 - int r_len;
44.72 - int s_offset;
44.73 - int r_offset;
44.74 -};
44.75 -typedef struct mca_common_netpatterns_payload_t mca_common_netpatterns_payload_t;
44.76 -
44.77 -struct mca_common_netpatterns_k_exchange_node_t {
44.78 - /* Order of a node in the tree - usually 2 */
44.79 - int tree_order;
44.80 - /* number of nodes this node will exchange data with */
44.81 - int n_exchanges;
44.82 - /* total number of exchanges that I actually participate in */
44.83 - int n_actual_exchanges;
44.84 - /* ranks of nodes involved in data exchnge */
44.85 - int **rank_exchanges;
44.86 - /* number of extra sources of data - outside largest power of 2 in
44.87 - * this group */
44.88 - int n_extra_sources;
44.89 - /* rank/s of the extra source */
44.90 - int *rank_extra_sources_array;
44.91 - /* number of tags needed per stripe */
44.92 - int n_tags;
44.93 - /* log k of largest full power of k for this node set */
44.94 - int log_tree_order;
44.95 - /* largest power of k that fits in this group */
44.96 - int n_largest_pow_tree_order;
44.97 - /* node type */
44.98 - int node_type;
44.99 - /* start of extra ranks k_nomial */
44.100 - int k_nomial_stray;
44.101 - /* reindex map */
44.102 - int *reindex_map;
44.103 - /* inverse of reindex map, i.e. given a reindexed id find out its actual rank */
44.104 - int *inv_reindex_map;
44.105 - /* reindexed node_rank */
44.106 - int reindex_myid;
44.107 - /* 2-d array that hold payload info for each level of recursive k-ing */
44.108 - mca_common_netpatterns_payload_t **payload_info;
44.109 -};
44.110 -typedef struct mca_common_netpatterns_k_exchange_node_t
44.111 - mca_common_netpatterns_k_exchange_node_t;
44.112 -
44.113 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
44.114 - mca_common_netpatterns_pair_exchange_node_t *exchange_node);
44.115 -
44.116 -OMPI_DECLSPEC void mca_common_netpatterns_free_recursive_doubling_tree_node(
44.117 - mca_common_netpatterns_pair_exchange_node_t *exchange_node);
44.118 -
44.119 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
44.120 - mca_common_netpatterns_pair_exchange_node_t *exchange_node);
44.121 -
44.122 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_tree_node(
44.123 - int num_nodes, int node_rank, int tree_order,
44.124 - mca_common_netpatterns_k_exchange_node_t *exchange_node);
44.125 -
44.126 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
44.127 - int num_nodes, int node_rank, int tree_order, int *hier_ranks,
44.128 - mca_common_netpatterns_k_exchange_node_t *exchange_node);
44.129 -
44.130 -
44.131 -/* Input: k_exchange_node structure
44.132 - Output: index in rank_exchanges array that points
44.133 - to the "start_point" for outgoing send.
44.134 -
44.135 - Please see below example of usage:
44.136 - for (i = start_point ; i > 0; i--)
44.137 - for (k = 0; k < tree_radix; k++)
44.138 - send messages to exchange_node->rank_exchanges[i][k];
44.139 -*/
44.140 -
44.141 -static inline __opal_attribute_always_inline__
44.142 -int mca_common_netpatterns_get_knomial_level(
44.143 - int my_rank, int src_rank,
44.144 - int radix, int size,
44.145 - int *k_level)
44.146 -{
44.147 - int distance,
44.148 - pow_k;
44.149 - int logk_level = 0;
44.150 -
44.151 - /* Calculate disctance from source of data */
44.152 - distance = src_rank - my_rank;
44.153 -
44.154 - /* Wrap around */
44.155 - if (0 > distance) {
44.156 - distance += size;
44.157 - }
44.158 -
44.159 - pow_k = 1;
44.160 - while(distance / (pow_k * radix)) {
44.161 - pow_k *= radix;
44.162 - ++logk_level;
44.163 - }
44.164 - --logk_level;
44.165 -
44.166 - *k_level = pow_k;
44.167 - return logk_level;
44.168 -}
44.169 -
44.170 -/* Input: my_rank, root, radix, size
44.171 - * Output: source of the data, offset in power of K
44.172 - */
44.173 -static inline __opal_attribute_always_inline__
44.174 -int mca_common_netpatterns_get_knomial_data_source(
44.175 - int my_rank, int root, int radix, int size,
44.176 - int *k_level, int *logk_level)
44.177 -{
44.178 - int level = radix;
44.179 - int step = 0;
44.180 -
44.181 - /* Calculate source of the data */
44.182 - while((0 == (root - my_rank) % level)
44.183 - && (level <= size)) {
44.184 - level *= radix;
44.185 - ++step;
44.186 - }
44.187 -
44.188 - *k_level = level/radix;
44.189 - *logk_level = step;
44.190 - return my_rank - (my_rank % level - root % level);
44.191 -}
44.192 -
44.193 -/* Input: my_rank, radix,
44.194 - * k_level - that you get from mca_common_netpatterns_get_knomial_data_source
44.195 - * k_step - some integer
44.196 - * Output: peer - next children in the tree
44.197 - * Usage:
44.198 - * src = mca_common_netpatterns_get_knomial_data_source(
44.199 - * my_rank, root, radix, size,
44.200 - * &k_level, &logk_level)
44.201 - * recv_from(src......);
44.202 - *
44.203 - * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, k_level, my_rank);
44.204 - * while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
44.205 - * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer);
44.206 - * send_to(peer....);
44.207 - * }
44.208 - * for more example please grep in ptpcoll bcol bcast files
44.209 - */
44.210 -
44.211 -typedef struct mca_common_netpatter_knomial_step_info_t {
44.212 - int k_step;
44.213 - int k_level;
44.214 - int k_tmp_peer;
44.215 -} mca_common_netpatter_knomial_step_info_t;
44.216 -
44.217 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix)\
44.218 -do { \
44.219 - if (1 != step_info.k_step) { \
44.220 - step_info.k_level /= radix; \
44.221 - } \
44.222 -} while (0) \
44.223 -
44.224 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, in_k_level, in_peer)\
44.225 -do { \
44.226 - step_info.k_step = 1; \
44.227 - step_info.k_level = in_k_level; \
44.228 - step_info.k_tmp_peer = in_peer; \
44.229 -} while (0)
44.230 -
44.231 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info) \
44.232 - (step_info.k_level > 1)
44.233 -
44.234 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer) \
44.235 -do { \
44.236 - int rank_radix_base = my_rank/step_info.k_level; \
44.237 - \
44.238 - peer = step_info.k_tmp_peer + step_info.k_level/radix; \
44.239 - if (rank_radix_base != peer/step_info.k_level) { \
44.240 - /* Wraparound the number */ \
44.241 - peer -= step_info.k_level; \
44.242 - assert(peer >=0); \
44.243 - } \
44.244 - ++step_info.k_step; \
44.245 - if (radix == step_info.k_step) { \
44.246 - step_info.k_level /= radix; \
44.247 - step_info.k_step = 1; \
44.248 - step_info.k_tmp_peer = my_rank; \
44.249 - } else { \
44.250 - step_info.k_tmp_peer = peer; \
44.251 - } \
44.252 - \
44.253 -} while (0)
44.254 -
44.255 -END_C_DECLS
44.256 -#endif
45.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_multinomial_tree.c Tue Feb 19 22:36:41 2013 +0000
45.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
45.3 @@ -1,190 +0,0 @@
45.4 -/*
45.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
45.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
45.7 - * $COPYRIGHT$
45.8 - *
45.9 - * Additional copyrights may follow
45.10 - *
45.11 - * $HEADER$
45.12 - */
45.13 -
45.14 -#include "ompi_config.h"
45.15 -#ifdef HAVE_UNISTD_H
45.16 -#include <unistd.h>
45.17 -#endif
45.18 -#include <sys/types.h>
45.19 -#ifdef HAVE_SYS_MMAN_H
45.20 -#include <sys/mman.h>
45.21 -#endif
45.22 -#include <fcntl.h>
45.23 -#include <stdlib.h>
45.24 -
45.25 -#include "ompi/constants.h"
45.26 -#include "common_netpatterns.h"
45.27 -
45.28 -
45.29 -/* setup an multi-nomial tree - for each node in the tree
45.30 - * this returns it's parent, and it's children */
45.31 -
45.32 -OMPI_DECLSPEC int mca_common_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
45.33 - mca_common_netpatterns_tree_node_t *tree_nodes)
45.34 -{
45.35 - /* local variables */
45.36 - int i,result;
45.37 - int cnt, n_nodes_in_this_level,node_index;
45.38 - int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
45.39 - int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
45.40 - int n_lvls_in_tree;
45.41 -
45.42 - /* sanity check */
45.43 - if( 1 >= tree_order ) {
45.44 - goto Error;
45.45 - }
45.46 -
45.47 -
45.48 - /* figure out number of levels in the tree */
45.49 -
45.50 - n_lvls_in_tree=0;
45.51 - result=num_nodes;
45.52 - /* cnt - number of ranks in given level */
45.53 - cnt=1;
45.54 - /* cummulative count of ranks */
45.55 - while( 0 < result ) {
45.56 - result-=cnt;
45.57 - cnt*=tree_order;
45.58 - n_lvls_in_tree++;
45.59 - };
45.60 -
45.61 - /* loop over tree levels */
45.62 - n_nodes_in_this_level=1;
45.63 - node_index=-1;
45.64 - n_cum_nodes=0;
45.65 - for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
45.66 -
45.67 - /* loop over nodes in current level */
45.68 - for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
45.69 - /* get node index */
45.70 - node_index++;
45.71 -
45.72 - /* break if reach group size */
45.73 - if( node_index == num_nodes) {
45.74 - break;
45.75 - }
45.76 -
45.77 - tree_nodes[node_index].my_rank=node_index;
45.78 - tree_nodes[node_index].children_ranks=NULL;
45.79 -
45.80 - /*
45.81 - * Parents
45.82 - */
45.83 - if( 0 == current_level ) {
45.84 - tree_nodes[node_index].n_parents=0;
45.85 - /* get parent index */
45.86 - tree_nodes[node_index].parent_rank=-1;
45.87 - } else {
45.88 - tree_nodes[node_index].n_parents=1;
45.89 - /* get parent index */
45.90 - n_nodes_prev_level=n_nodes_in_this_level/tree_order;
45.91 - if( current_level == n_lvls_in_tree -1 ) {
45.92 - /* load balance the lowest level */
45.93 - parent_rank=node-
45.94 - (node/n_nodes_prev_level)*n_nodes_prev_level;
45.95 - parent_rank=n_cum_nodes-n_nodes_prev_level+
45.96 - parent_rank;
45.97 - tree_nodes[node_index].parent_rank=parent_rank;
45.98 - } else {
45.99 - tree_nodes[node_index].parent_rank=
45.100 - (n_cum_nodes-n_nodes_prev_level)+node/tree_order;
45.101 - }
45.102 - }
45.103 -
45.104 - /*
45.105 - * Children
45.106 - */
45.107 -
45.108 - /* get number of children */
45.109 - if( (n_lvls_in_tree-1) == current_level ) {
45.110 - /* leaves have no nodes */
45.111 - tree_nodes[node_index].n_children=0;
45.112 - tree_nodes[node_index].children_ranks=NULL;
45.113 - } else {
45.114 - /* take into account last level being incomplete */
45.115 - if( (n_lvls_in_tree-2) == current_level ) {
45.116 - /* last level is load balanced */
45.117 - n_nodes_in_last_level=num_nodes-
45.118 - (n_cum_nodes+n_nodes_in_this_level);
45.119 - n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
45.120 - n_in_partial_stipe=n_nodes_in_last_level-
45.121 - n_full_stripes*n_nodes_in_this_level;
45.122 - n_children=n_full_stripes;
45.123 - if( n_full_stripes < tree_order ) {
45.124 - if( node <= n_in_partial_stipe-1 ) {
45.125 - n_children++;
45.126 - }
45.127 - }
45.128 - tree_nodes[node_index].n_children=n_children;
45.129 - if( 0 < n_children ) {
45.130 - tree_nodes[node_index].children_ranks=(int *)
45.131 - malloc(sizeof(int)*n_children);
45.132 - if( NULL == tree_nodes[node_index].children_ranks) {
45.133 - goto Error;
45.134 - }
45.135 - } else {
45.136 - tree_nodes[node_index].children_ranks=NULL;
45.137 - }
45.138 - /* fill in list */
45.139 - for( rank=0 ; rank < n_children ; rank++ ) {
45.140 - tree_nodes[node_index].children_ranks[rank]=
45.141 - node+rank*n_nodes_in_this_level;
45.142 - tree_nodes[node_index].children_ranks[rank]+=
45.143 - (n_cum_nodes+n_nodes_in_this_level);
45.144 - }
45.145 - } else {
45.146 - n_children=tree_order;
45.147 - tree_nodes[node_index].n_children=tree_order;
45.148 - tree_nodes[node_index].children_ranks=(int *)
45.149 - malloc(sizeof(int)*n_children);
45.150 - if( NULL == tree_nodes[node_index].children_ranks) {
45.151 - goto Error;
45.152 - }
45.153 - for( rank=0 ; rank < n_children ; rank++ ) {
45.154 - tree_nodes[node_index].children_ranks[rank]=
45.155 - rank+tree_order*node;
45.156 - tree_nodes[node_index].children_ranks[rank]+=
45.157 - (n_cum_nodes+n_nodes_in_this_level);
45.158 - }
45.159 - }
45.160 - }
45.161 -
45.162 - } /* end node loop */
45.163 -
45.164 - /* update helper counters */
45.165 - n_cum_nodes+=n_nodes_in_this_level;
45.166 - n_nodes_in_this_level*=tree_order;
45.167 - }
45.168 -
45.169 - /* set node type */
45.170 - for(i=0 ; i < num_nodes ; i++ ) {
45.171 - if( 0 == tree_nodes[i].n_parents ) {
45.172 - tree_nodes[i].my_node_type=ROOT_NODE;
45.173 - } else if ( 0 == tree_nodes[i].n_children ) {
45.174 - tree_nodes[i].my_node_type=LEAF_NODE;
45.175 - } else {
45.176 - tree_nodes[i].my_node_type=INTERIOR_NODE;
45.177 - }
45.178 - }
45.179 -
45.180 - /* successful return */
45.181 - return OMPI_SUCCESS;
45.182 -
45.183 -Error:
45.184 - /* free allocated memory */
45.185 - for( i=0 ; i < num_nodes ; i++ ) {
45.186 - if( NULL != tree_nodes[i].children_ranks ) {
45.187 - free(tree_nodes[i].children_ranks);
45.188 - }
45.189 - }
45.190 -
45.191 - /* error return */
45.192 - return OMPI_ERROR;
45.193 -}
46.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_nary_tree.c Tue Feb 19 22:36:41 2013 +0000
46.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
46.3 @@ -1,442 +0,0 @@
46.4 -/*
46.5 - * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
46.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
46.7 - * $COPYRIGHT$
46.8 - *
46.9 - * Additional copyrights may follow
46.10 - *
46.11 - * $HEADER$
46.12 - */
46.13 -
46.14 -#include "ompi_config.h"
46.15 -#ifdef HAVE_UNISTD_H
46.16 -#include <unistd.h>
46.17 -#endif
46.18 -#include <sys/types.h>
46.19 -#ifdef HAVE_SYS_MMAN_H
46.20 -#include <sys/mman.h>
46.21 -#endif
46.22 -#include <fcntl.h>
46.23 -#include <errno.h>
46.24 -#include <stdlib.h>
46.25 -
46.26 -#include "ompi/constants.h"
46.27 -#include "common_netpatterns.h"
46.28 -
46.29 -/*
46.30 - * Create mmaped shared file
46.31 - */
46.32 -
46.33 -/* setup an n-array tree */
46.34 -
46.35 -int mca_common_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
46.36 - mca_common_netpatterns_tree_node_t *my_node)
46.37 -{
46.38 - /* local variables */
46.39 - int n_levels, result;
46.40 - int my_level_in_tree, cnt;
46.41 - int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
46.42 - int start_index,end_index;
46.43 -
46.44 - /* sanity check */
46.45 - if( 1 >= tree_order ) {
46.46 - goto Error;
46.47 - }
46.48 -
46.49 - my_node->my_rank=my_rank;
46.50 - my_node->tree_size=num_nodes;
46.51 -
46.52 - /* figure out number of levels in tree */
46.53 - n_levels=0;
46.54 - result=num_nodes-1;
46.55 - while (0 < result ) {
46.56 - result/=tree_order;
46.57 - n_levels++;
46.58 - };
46.59 -
46.60 - /* figure out who my children and parents are */
46.61 - my_level_in_tree=-1;
46.62 - result=my_rank;
46.63 - /* cnt - number of ranks in given level */
46.64 - cnt=1;
46.65 - /* cummulative count of ranks */
46.66 - while( 0 <= result ) {
46.67 - result-=cnt;
46.68 - cnt*=tree_order;
46.69 - my_level_in_tree++;
46.70 - };
46.71 - /* int my_level_in_tree, n_children, n_parents; */
46.72 -
46.73 - if( 0 == my_rank ) {
46.74 - my_node->n_parents=0;
46.75 - my_node->parent_rank=-1;
46.76 - my_rank_in_my_level=0;
46.77 - } else {
46.78 - my_node->n_parents=1;
46.79 - cnt=1;
46.80 - cum_cnt=0;
46.81 - for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
46.82 - /* cummulative count up to this level */
46.83 - cum_cnt+=cnt;
46.84 - /* number of ranks in this level */
46.85 - cnt*=tree_order;
46.86 - }
46.87 - my_rank_in_my_level=my_rank-cum_cnt;
46.88 - /* tree_order consecutive ranks have the same parent */
46.89 - my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
46.90 - }
46.91 -
46.92 - /* figure out number of levels in the tree */
46.93 - n_lvls_in_tree=0;
46.94 - result=num_nodes;
46.95 - /* cnt - number of ranks in given level */
46.96 - cnt=1;
46.97 - /* cummulative count of ranks */
46.98 - while( 0 < result ) {
46.99 - result-=cnt;
46.100 - cnt*=tree_order;
46.101 - n_lvls_in_tree++;
46.102 - };
46.103 -
46.104 - my_node->children_ranks=(int *)NULL;
46.105 -
46.106 - /* get list of children */
46.107 - if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
46.108 - /* last level has no children */
46.109 - my_node->n_children=0;
46.110 - } else {
46.111 - cum_cnt=0;
46.112 - cnt=1;
46.113 - for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
46.114 - cum_cnt+=cnt;
46.115 - cnt*=tree_order;
46.116 - }
46.117 - start_index=cum_cnt+my_rank_in_my_level*tree_order;
46.118 - end_index=start_index+tree_order-1;
46.119 -
46.120 - /* don't go out of bounds at the end of the list */
46.121 - if( end_index >= num_nodes ) {
46.122 - end_index = num_nodes-1;
46.123 - }
46.124 -
46.125 - if( start_index <= (num_nodes-1) ) {
46.126 - my_node->n_children=end_index-start_index+1;
46.127 - } else {
46.128 - my_node->n_children=0;
46.129 - }
46.130 -
46.131 - my_node->children_ranks=NULL;
46.132 - if( 0 < my_node->n_children ) {
46.133 - my_node->children_ranks=
46.134 - (int *)malloc( sizeof(int)*my_node->n_children);
46.135 - if( NULL == my_node->children_ranks) {
46.136 - goto Error;
46.137 - }
46.138 - for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
46.139 - my_node->children_ranks[lvl-start_index]=lvl;
46.140 - }
46.141 - }
46.142 - }
46.143 - /* set node type */
46.144 - if( 0 == my_node->n_parents ) {
46.145 - my_node->my_node_type=ROOT_NODE;
46.146 - } else if ( 0 == my_node->n_children ) {
46.147 - my_node->my_node_type=LEAF_NODE;
46.148 - } else {
46.149 - my_node->my_node_type=INTERIOR_NODE;
46.150 - }
46.151 -
46.152 -
46.153 - /* successful return */
46.154 - return OMPI_SUCCESS;
46.155 -
46.156 -Error:
46.157 -
46.158 - /* error return */
46.159 - return OMPI_ERROR;
46.160 -}
46.161 -
46.162 -int mca_common_netpatterns_setup_narray_knomial_tree(
46.163 - int tree_order, int my_rank, int num_nodes,
46.164 - mca_common_netpatterns_narray_knomial_tree_node_t *my_node)
46.165 -{
46.166 - /* local variables */
46.167 - int n_levels, result;
46.168 - int my_level_in_tree, cnt ;
46.169 - int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
46.170 - int start_index,end_index;
46.171 - int rc;
46.172 -
46.173 - /* sanity check */
46.174 - if( 1 >= tree_order ) {
46.175 - goto Error;
46.176 - }
46.177 -
46.178 - my_node->my_rank=my_rank;
46.179 - my_node->tree_size=num_nodes;
46.180 -
46.181 - /* figure out number of levels in tree */
46.182 - n_levels=0;
46.183 - result=num_nodes-1;
46.184 - while (0 < result ) {
46.185 - result/=tree_order;
46.186 - n_levels++;
46.187 - };
46.188 -
46.189 - /* figure out who my children and parents are */
46.190 - my_level_in_tree=-1;
46.191 - result=my_rank;
46.192 - /* cnt - number of ranks in given level */
46.193 - cnt=1;
46.194 - /* cummulative count of ranks */
46.195 - while( 0 <= result ) {
46.196 - result-=cnt;
46.197 - cnt*=tree_order;
46.198 - my_level_in_tree++;
46.199 - };
46.200 - /* int my_level_in_tree, n_children, n_parents; */
46.201 -
46.202 - if( 0 == my_rank ) {
46.203 - my_node->n_parents=0;
46.204 - my_node->parent_rank=-1;
46.205 - my_rank_in_my_level=0;
46.206 - } else {
46.207 - my_node->n_parents=1;
46.208 - cnt=1;
46.209 - cum_cnt=0;
46.210 - for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
46.211 - /* cummulative count up to this level */
46.212 - cum_cnt+=cnt;
46.213 - /* number of ranks in this level */
46.214 - cnt*=tree_order;
46.215 - }
46.216 -
46.217 - my_node->rank_on_level =
46.218 - my_rank_in_my_level =
46.219 - my_rank-cum_cnt;
46.220 - my_node->level_size = cnt;
46.221 -
46.222 - rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
46.223 - my_node->level_size, my_node->rank_on_level,
46.224 - tree_order, &my_node->k_node);
46.225 - if (OMPI_SUCCESS != rc) {
46.226 - goto Error;
46.227 - }
46.228 -
46.229 - /* tree_order consecutive ranks have the same parent */
46.230 - my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
46.231 - }
46.232 -
46.233 - /* figure out number of levels in the tree */
46.234 - n_lvls_in_tree=0;
46.235 - result=num_nodes;
46.236 - /* cnt - number of ranks in given level */
46.237 - cnt=1;
46.238 - /* cummulative count of ranks */
46.239 - while( 0 < result ) {
46.240 - result-=cnt;
46.241 - cnt*=tree_order;
46.242 - n_lvls_in_tree++;
46.243 - };
46.244 -
46.245 - if(result < 0) {
46.246 - /* reset the size on group */
46.247 - num_nodes = cnt / tree_order;
46.248 - }
46.249 -
46.250 - my_node->children_ranks=(int *)NULL;
46.251 -
46.252 - /* get list of children */
46.253 - if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
46.254 - /* last level has no children */
46.255 - my_node->n_children=0;
46.256 - } else {
46.257 - cum_cnt=0;
46.258 - cnt=1;
46.259 - for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
46.260 - cum_cnt+=cnt;
46.261 - cnt*=tree_order;
46.262 - }
46.263 - start_index=cum_cnt+my_rank_in_my_level*tree_order;
46.264 - end_index=start_index+tree_order-1;
46.265 -
46.266 - /* don't go out of bounds at the end of the list */
46.267 - if( end_index >= num_nodes ) {
46.268 - end_index = num_nodes-1;
46.269 - }
46.270 -
46.271 - if( start_index <= (num_nodes-1) ) {
46.272 - my_node->n_children=end_index-start_index+1;
46.273 - } else {
46.274 - my_node->n_children=0;
46.275 - }
46.276 -
46.277 - my_node->children_ranks=NULL;
46.278 - if( 0 < my_node->n_children ) {
46.279 - my_node->children_ranks=
46.280 - (int *)malloc( sizeof(int)*my_node->n_children);
46.281 - if( NULL == my_node->children_ranks) {
46.282 - goto Error;
46.283 - }
46.284 - for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
46.285 - my_node->children_ranks[lvl-start_index]=lvl;
46.286 - }
46.287 - }
46.288 - }
46.289 - /* set node type */
46.290 - if( 0 == my_node->n_parents ) {
46.291 - my_node->my_node_type=ROOT_NODE;
46.292 - } else if ( 0 == my_node->n_children ) {
46.293 - my_node->my_node_type=LEAF_NODE;
46.294 - } else {
46.295 - my_node->my_node_type=INTERIOR_NODE;
46.296 - }
46.297 -
46.298 -
46.299 - /* successful return */
46.300 - return OMPI_SUCCESS;
46.301 -
46.302 -Error:
46.303 -
46.304 - /* error return */
46.305 - return OMPI_ERROR;
46.306 -}
46.307 -
46.308 -/* calculate the nearest power of radix that is equal to or greater
46.309 - * than size, with the specified radix. The resulting tree is of
46.310 - * depth n_lvls.
46.311 - */
46.312 -OMPI_DECLSPEC int roundup_to_power_radix ( int radix, int size, int *n_lvls )
46.313 -{
46.314 - int n_levels=0, return_value=1;
46.315 - int result;
46.316 - if( 1 > size ) {
46.317 - return 0;
46.318 - }
46.319 -
46.320 - result=size-1;
46.321 - while (0 < result ) {
46.322 - result/=radix;
46.323 - n_levels++;
46.324 - return_value*=radix;
46.325 - };
46.326 - *n_lvls=n_levels;
46.327 - return return_value;
46.328 -}
46.329 -
46.330 -static int fill_in_node_data(int tree_order, int num_nodes, int my_node,
46.331 - mca_common_netpatterns_tree_node_t *nodes_data)
46.332 -{
46.333 - /* local variables */
46.334 - int rc, num_ranks_per_child, num_children, n_extra;
46.335 - int child, rank, n_to_offset, n_ranks_to_child;
46.336 -
46.337 - /* figure out who are my children */
46.338 - num_ranks_per_child=num_nodes/tree_order;
46.339 - if( num_ranks_per_child ) {
46.340 - num_children=tree_order;
46.341 - n_extra=num_nodes-num_ranks_per_child*tree_order;
46.342 - } else {
46.343 - num_children=num_nodes;
46.344 - /* each child has the same number of descendents - 1 */
46.345 - n_extra=0;
46.346 - /* when there is a child, there is at least one
46.347 - * descendent */
46.348 - num_ranks_per_child=1;
46.349 - }
46.350 -
46.351 - nodes_data[my_node].n_children=num_children;
46.352 - if( num_children ) {
46.353 - nodes_data[my_node].children_ranks=(int *)
46.354 - malloc(sizeof(int)*num_children);
46.355 - if(!nodes_data[my_node].children_ranks) {
46.356 -
46.357 - if ( NULL == nodes_data[my_node].children_ranks )
46.358 - {
46.359 - fprintf(stderr, "Cannot allocate memory for children_ranks.\n");
46.360 - rc = OMPI_ERR_OUT_OF_RESOURCE;
46.361 - goto error;
46.362 - }
46.363 - }
46.364 - }
46.365 -
46.366 - rank = my_node;
46.367 - for( child=0 ; child < num_children ; child ++ ) {
46.368 -
46.369 - /* set parent information */
46.370 - nodes_data[rank].n_parents=1;
46.371 - nodes_data[rank].parent_rank=my_node;
46.372 - if( n_extra ) {
46.373 - n_to_offset=child;
46.374 - if( n_to_offset > n_extra){
46.375 - n_to_offset=n_extra;
46.376 - }
46.377 - } else {
46.378 - n_to_offset=0;
46.379 - }
46.380 -
46.381 - rank=my_node+1+child*num_ranks_per_child;
46.382 - rank+=n_to_offset;
46.383 -
46.384 - /* set parent information */
46.385 - nodes_data[rank].n_parents=1;
46.386 - nodes_data[rank].parent_rank=my_node;
46.387 -
46.388 - n_ranks_to_child=num_ranks_per_child;
46.389 - if(n_extra && (child < n_extra) ) {
46.390 - n_ranks_to_child++;
46.391 - }
46.392 -
46.393 - /* set child information */
46.394 - nodes_data[my_node].children_ranks[child]=rank;
46.395 -
46.396 - /* remove the child from the list of ranks */
46.397 - n_ranks_to_child--;
46.398 - rc=fill_in_node_data(tree_order, n_ranks_to_child, rank, nodes_data);
46.399 - if( OMPI_SUCCESS != rc ) {
46.400 - goto error;
46.401 - }
46.402 -
46.403 - }
46.404 -
46.405 - /* return */
46.406 - return OMPI_SUCCESS;
46.407 -
46.408 - /* Error */
46.409 -error:
46.410 - return rc;
46.411 -
46.412 -}
46.413 -
46.414 -/*
46.415 - * This routine sets up the array describing the communication tree for
46.416 - * a k-ary tree where the children form a contiguous range of ranks at
46.417 - * each level. The assumption here is that rank 0 is always the root -
46.418 - * ranks may be rotated based on who the actual root is, to obtain the
46.419 - * appropriate communication pattern for such roots.
46.420 - */
46.421 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree_contigous_ranks(
46.422 - int tree_order, int num_nodes,
46.423 - mca_common_netpatterns_tree_node_t **tree_nodes)
46.424 -{
46.425 - /* local variables */
46.426 - int num_descendent_ranks=num_nodes-1;
46.427 - int rc=OMPI_SUCCESS;
46.428 -
46.429 - *tree_nodes=(mca_common_netpatterns_tree_node_t *)malloc(
46.430 - sizeof(mca_common_netpatterns_tree_node_t)*
46.431 - num_nodes);
46.432 - if(!(*tree_nodes) ) {
46.433 - fprintf(stderr, "Cannot allocate memory for tree_nodes.\n");
46.434 - rc = OMPI_ERR_OUT_OF_RESOURCE;
46.435 - return rc;
46.436 - }
46.437 -
46.438 - (*tree_nodes)[0].n_parents=0;
46.439 - rc=fill_in_node_data(tree_order,
46.440 - num_descendent_ranks, 0, *tree_nodes);
46.441 -
46.442 - /* successful return */
46.443 - return rc;
46.444 -
46.445 -}
47.1 --- a/ompi/mca/sbgp/basesmsocket/Makefile.am Tue Feb 19 22:36:41 2013 +0000
47.2 +++ b/ompi/mca/sbgp/basesmsocket/Makefile.am Tue Feb 19 22:50:56 2013 +0000
47.3 @@ -35,8 +35,7 @@
47.4 mcacomponent_LTLIBRARIES = $(component_install)
47.5 mca_sbgp_basesmsocket_la_SOURCES = $(sources)
47.6 mca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version
47.7 -mca_sbgp_basesmsocket_la_LIBADD = \
47.8 - $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la
47.9 +mca_sbgp_basesmsocket_la_LIBADD =
47.10
47.11 noinst_LTLIBRARIES = $(component_noinst)
47.12 libmca_sbgp_basesmsocket_la_SOURCES =$(sources)
48.1 --- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c Tue Feb 19 22:36:41 2013 +0000
48.2 +++ b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c Tue Feb 19 22:50:56 2013 +0000
48.3 @@ -39,7 +39,7 @@
48.4 #include "ompi/communicator/communicator.h"
48.5 #include "sbgp_basesmsocket.h"
48.6
48.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
48.8 +#include "ompi/patterns/comm/coll_ops.h"
48.9
48.10
48.11 /*
49.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
49.2 +++ b/ompi/patterns/comm/Makefile.am Tue Feb 19 22:50:56 2013 +0000
49.3 @@ -0,0 +1,16 @@
49.4 +# Copyright (c) 2013 Oak Ridge National Laboratory. All rights reserved.
49.5 +# $COPYRIGHT$
49.6 +#
49.7 +# Additional copyrights may follow
49.8 +#
49.9 +# $HEADER$
49.10 +#
49.11 +
49.12 +headers += \
49.13 + patterns/comm/coll_ops.h \
49.14 + patterns/comm/commpatterns.h
49.15 +
49.16 +libmpi_la_SOURCES += \
49.17 + patterns/comm/allreduce.c \
49.18 + patterns/comm/allgather.c \
49.19 + patterns/comm/bcast.c
50.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
50.2 +++ b/ompi/patterns/comm/allgather.c Tue Feb 19 22:50:56 2013 +0000
50.3 @@ -0,0 +1,288 @@
50.4 +/*
50.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
50.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
50.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
50.8 + * All rights reserved.
50.9 + * $COPYRIGHT$
50.10 + *
50.11 + * Additional copyrights may follow
50.12 + *
50.13 + * $HEADER$
50.14 + */
50.15 +/** @file */
50.16 +
50.17 +#include "ompi_config.h"
50.18 +
50.19 +#include "ompi/constants.h"
50.20 +#include "ompi/op/op.h"
50.21 +#include "ompi/datatype/ompi_datatype.h"
50.22 +#include "ompi/communicator/communicator.h"
50.23 +#include "opal/include/opal/sys/atomic.h"
50.24 +#include "ompi/mca/pml/pml.h"
50.25 +#include "ompi/patterns/net/netpatterns.h"
50.26 +#include "coll_ops.h"
50.27 +
50.28 +/**
50.29 + * All-reduce - subgroup in communicator
50.30 + */
50.31 +OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
50.32 + ompi_datatype_t *dtype, int my_rank_in_group,
50.33 + int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
50.34 +{
50.35 + /* local variables */
50.36 + int rc=OMPI_SUCCESS,msg_cnt;
50.37 + int pair_rank,exchange,extra_rank, n_extra_nodes,n_extra;
50.38 + int proc_block,extra_start,extra_end,iovec_len;
50.39 + int remote_data_start_rank,remote_data_end_rank;
50.40 + int local_data_start_rank;
50.41 + netpatterns_pair_exchange_node_t my_exchange_node;
50.42 + size_t message_extent,current_data_extent,current_data_count;
50.43 + size_t dt_size;
50.44 + OPAL_PTRDIFF_TYPE dt_extent;
50.45 + char *src_buf_current;
50.46 + char *dest_buf_current;
50.47 + struct iovec send_iov[2] = {{0,0},{0,0}},
50.48 + recv_iov[2] = {{0,0},{0,0}};
50.49 + ompi_request_t *requests[4];
50.50 +
50.51 + /* get size of data needed - same layout as user data, so that
50.52 + * we can apply the reudction routines directly on these buffers
50.53 + */
50.54 + rc = ompi_datatype_type_size(dtype, &dt_size);
50.55 + if( OMPI_SUCCESS != rc ) {
50.56 + goto Error;
50.57 + }
50.58 +
50.59 + rc = ompi_datatype_type_extent(dtype, &dt_extent);
50.60 + if( OMPI_SUCCESS != rc ) {
50.61 + goto Error;
50.62 + }
50.63 + message_extent = dt_extent*count;
50.64 +
50.65 + /* place my data in the correct destination buffer */
50.66 + rc=ompi_datatype_copy_content_same_ddt(dtype,count,
50.67 + (char *)dest_buf+my_rank_in_group*message_extent,
50.68 + (char *)src_buf);
50.69 + if( OMPI_SUCCESS != rc ) {
50.70 + goto Error;
50.71 + }
50.72 +
50.73 + /* 1 process special case */
50.74 + if(1 == n_peers) {
50.75 + return OMPI_SUCCESS;
50.76 + }
50.77 +
50.78 + /* get my reduction communication pattern */
50.79 + rc = netpatterns_setup_recursive_doubling_tree_node(n_peers,
50.80 + my_rank_in_group, &my_exchange_node);
50.81 + if(OMPI_SUCCESS != rc){
50.82 + return rc;
50.83 + }
50.84 +
50.85 + n_extra_nodes=n_peers-my_exchange_node.n_largest_pow_2;
50.86 +
50.87 + /* get the data from the extra sources */
50.88 + if(0 < my_exchange_node.n_extra_sources) {
50.89 +
50.90 + if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
50.91 +
50.92 + /*
50.93 + ** Receive data from extra node
50.94 + */
50.95 +
50.96 + extra_rank=my_exchange_node.rank_extra_source;
50.97 + /* receive the data into the correct location - will use 2
50.98 + * messages in the recursive doubling phase */
50.99 + dest_buf_current=(char *)dest_buf+message_extent*extra_rank;
50.100 + rc=MCA_PML_CALL(recv(dest_buf_current,
50.101 + count,dtype,ranks_in_comm[extra_rank],
50.102 + -OMPI_COMMON_TAG_ALLREDUCE,
50.103 + comm, MPI_STATUSES_IGNORE));
50.104 + if( 0 > rc ) {
50.105 + goto Error;
50.106 + }
50.107 +
50.108 + } else {
50.109 +
50.110 + /*
50.111 + ** Send data to "partner" node
50.112 + */
50.113 + extra_rank=my_exchange_node.rank_extra_source;
50.114 + src_buf_current=(char *)src_buf;
50.115 + rc=MCA_PML_CALL(send(src_buf_current,
50.116 + count,dtype,ranks_in_comm[extra_rank],
50.117 + -OMPI_COMMON_TAG_ALLREDUCE,
50.118 + MCA_PML_BASE_SEND_STANDARD,
50.119 + comm));
50.120 + if( 0 > rc ) {
50.121 + goto Error;
50.122 + }
50.123 + }
50.124 + }
50.125 +
50.126 + current_data_extent=message_extent;
50.127 + current_data_count=count;
50.128 + src_buf_current=(char *)dest_buf+my_rank_in_group*message_extent;
50.129 + proc_block=1;
50.130 + local_data_start_rank=my_rank_in_group;
50.131 + /* loop over data exchanges */
50.132 + for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
50.133 +
50.134 + /* is the remote data read */
50.135 + pair_rank=my_exchange_node.rank_exchanges[exchange];
50.136 + msg_cnt=0;
50.137 +
50.138 + /*
50.139 + * Power of 2 data segment
50.140 + */
50.141 + /* post non-blocking receive */
50.142 + if(pair_rank > my_rank_in_group ){
50.143 + recv_iov[0].iov_base=src_buf_current+current_data_extent;
50.144 + recv_iov[0].iov_len=current_data_extent;
50.145 + iovec_len=1;
50.146 + remote_data_start_rank=local_data_start_rank+proc_block;
50.147 + remote_data_end_rank=remote_data_start_rank+proc_block-1;
50.148 + } else {
50.149 + recv_iov[0].iov_base=src_buf_current-current_data_extent;
50.150 + recv_iov[0].iov_len=current_data_extent;
50.151 + iovec_len=1;
50.152 + remote_data_start_rank=local_data_start_rank-proc_block;
50.153 + remote_data_end_rank=remote_data_start_rank+proc_block-1;
50.154 + }
50.155 + /* the data from the non power of 2 ranks */
50.156 + if(remote_data_start_rank<n_extra_nodes) {
50.157 + /* figure out how much data is at the remote rank */
50.158 + /* last rank with data */
50.159 + extra_start=remote_data_start_rank;
50.160 + extra_end=remote_data_end_rank;
50.161 + if(extra_end >= n_extra_nodes ) {
50.162 + /* if last rank exceeds the ranks with extra data,
50.163 + * adjust this.
50.164 + */
50.165 + extra_end=n_extra_nodes-1;
50.166 + }
50.167 + /* get the number of ranks whos data is to be grabbed */
50.168 + n_extra=extra_end-extra_start+1;
50.169 +
50.170 + recv_iov[1].iov_base=(char *)dest_buf+
50.171 + (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
50.172 + recv_iov[1].iov_len=n_extra*count;
50.173 + iovec_len=2;
50.174 + }
50.175 +
50.176 + rc=MCA_PML_CALL(irecv(recv_iov[0].iov_base,
50.177 + current_data_count,dtype,ranks_in_comm[pair_rank],
50.178 + -OMPI_COMMON_TAG_ALLREDUCE,
50.179 + comm,&(requests[msg_cnt])));
50.180 + if( 0 > rc ) {
50.181 + goto Error;
50.182 + }
50.183 + msg_cnt++;
50.184 +
50.185 + if(iovec_len > 1 ) {
50.186 + rc=MCA_PML_CALL(irecv(recv_iov[1].iov_base,
50.187 + recv_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
50.188 + -OMPI_COMMON_TAG_ALLREDUCE,
50.189 + comm,&(requests[msg_cnt])));
50.190 + if( 0 > rc ) {
50.191 + goto Error;
50.192 + }
50.193 + msg_cnt++;
50.194 + }
50.195 +
50.196 + /* post non-blocking send */
50.197 + send_iov[0].iov_base=src_buf_current;
50.198 + send_iov[0].iov_len=current_data_extent;
50.199 + iovec_len=1;
50.200 + /* the data from the non power of 2 ranks */
50.201 + if(local_data_start_rank<n_extra_nodes) {
50.202 + /* figure out how much data is at the remote rank */
50.203 + /* last rank with data */
50.204 + extra_start=local_data_start_rank;
50.205 + extra_end=extra_start+proc_block-1;
50.206 + if(extra_end >= n_extra_nodes ) {
50.207 + /* if last rank exceeds the ranks with extra data,
50.208 + * adjust this.
50.209 + */
50.210 + extra_end=n_extra_nodes-1;
50.211 + }
50.212 + /* get the number of ranks whos data is to be grabbed */
50.213 + n_extra=extra_end-extra_start+1;
50.214 +
50.215 + send_iov[1].iov_base=(char *)dest_buf+
50.216 + (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
50.217 + send_iov[1].iov_len=n_extra*count;
50.218 + iovec_len=2;
50.219 + }
50.220 +
50.221 + rc=MCA_PML_CALL(isend(send_iov[0].iov_base,
50.222 + current_data_count,dtype,ranks_in_comm[pair_rank],
50.223 + -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
50.224 + comm,&(requests[msg_cnt])));
50.225 + if( 0 > rc ) {
50.226 + goto Error;
50.227 + }
50.228 + msg_cnt++;
50.229 + if( iovec_len > 1 ) {
50.230 + rc=MCA_PML_CALL(isend(send_iov[1].iov_base,
50.231 + send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
50.232 + -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
50.233 + comm,&(requests[msg_cnt])));
50.234 + if( 0 > rc ) {
50.235 + goto Error;
50.236 + }
50.237 + msg_cnt++;
50.238 + }
50.239 +
50.240 + /* prepare the source buffer for the next iteration */
50.241 + if(pair_rank < my_rank_in_group ){
50.242 + src_buf_current-=current_data_extent;
50.243 + local_data_start_rank-=proc_block;
50.244 + }
50.245 + proc_block*=2;
50.246 + current_data_extent*=2;
50.247 + current_data_count*=2;
50.248 +
50.249 + /* wait on send and receive completion */
50.250 + ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
50.251 + }
50.252 +
50.253 + /* copy data in from the "extra" source, if need be */
50.254 + if(0 < my_exchange_node.n_extra_sources) {
50.255 +
50.256 + if ( EXTRA_NODE == my_exchange_node.node_type ) {
50.257 + /*
50.258 + ** receive the data
50.259 + ** */
50.260 + extra_rank=my_exchange_node.rank_extra_source;
50.261 +
50.262 + rc=MCA_PML_CALL(recv(dest_buf,
50.263 + count*n_peers,dtype,ranks_in_comm[extra_rank],
50.264 + -OMPI_COMMON_TAG_ALLREDUCE,
50.265 + comm,MPI_STATUSES_IGNORE));
50.266 + if(0 > rc ) {
50.267 + goto Error;
50.268 + }
50.269 + } else {
50.270 + /* send the data to the pair-rank outside of the power of 2 set
50.271 + ** of ranks
50.272 + */
50.273 +
50.274 + extra_rank=my_exchange_node.rank_extra_source;
50.275 + rc=MCA_PML_CALL(send(dest_buf,
50.276 + count*n_peers,dtype,ranks_in_comm[extra_rank],
50.277 + -OMPI_COMMON_TAG_ALLREDUCE,
50.278 + MCA_PML_BASE_SEND_STANDARD,
50.279 + comm));
50.280 + if( 0 > rc ) {
50.281 + goto Error;
50.282 + }
50.283 + }
50.284 + }
50.285 +
50.286 + /* return */
50.287 + return OMPI_SUCCESS;
50.288 +
50.289 +Error:
50.290 + return rc;
50.291 +}
51.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
51.2 +++ b/ompi/patterns/comm/allreduce.c Tue Feb 19 22:50:56 2013 +0000
51.3 @@ -0,0 +1,255 @@
51.4 +/*
51.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
51.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
51.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
51.8 + * All rights reserved.
51.9 + * $COPYRIGHT$
51.10 + *
51.11 + * Additional copyrights may follow
51.12 + *
51.13 + * $HEADER$
51.14 + */
51.15 +/** @file */
51.16 +
51.17 +#include "ompi_config.h"
51.18 +
51.19 +#include "ompi/constants.h"
51.20 +#include "ompi/op/op.h"
51.21 +#include "ompi/datatype/ompi_datatype.h"
51.22 +#include "ompi/communicator/communicator.h"
51.23 +#include "opal/include/opal/sys/atomic.h"
51.24 +#include "ompi/mca/pml/pml.h"
51.25 +#include "ompi/patterns/net/netpatterns.h"
51.26 +#include "coll_ops.h"
51.27 +#include "commpatterns.h"
51.28 +
51.29 +/**
51.30 + * All-reduce for contigous primitive types
51.31 + */
51.32 +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
51.33 + ompi_datatype_t *dtype, int my_rank_in_group,
51.34 + struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
51.35 + ompi_communicator_t *comm)
51.36 +{
51.37 + /* local variables */
51.38 + int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
51.39 + int pair_rank,exchange,extra_rank;
51.40 + netpatterns_pair_exchange_node_t my_exchange_node;
51.41 + int count_processed,count_this_stripe;
51.42 + size_t dt_size,dt_extent;
51.43 + char scratch_bufers[2][MAX_TMP_BUFFER];
51.44 + int send_buffer=0,recv_buffer=1;
51.45 + char *sbuf_current, *rbuf_current;
51.46 + ompi_request_t *requests[2];
51.47 +
51.48 + /* get size of data needed - same layout as user data, so that
51.49 + * we can apply the reudction routines directly on these buffers
51.50 + */
51.51 + rc = opal_datatype_type_size((opal_datatype_t *)dtype, &dt_size);
51.52 + if( OMPI_SUCCESS != rc ) {
51.53 + goto Error;
51.54 + }
51.55 + rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent);
51.56 + if( OMPI_SUCCESS != rc ) {
51.57 + goto Error;
51.58 + }
51.59 +
51.60 + /* 1 process special case */
51.61 + if(1 == n_peers) {
51.62 + /* place my data in the correct destination buffer */
51.63 + rc=ompi_datatype_copy_content_same_ddt(dtype,count,
51.64 + (char *)rbuf, (char *)sbuf);
51.65 + if( OMPI_SUCCESS != rc ) {
51.66 + goto Error;
51.67 + }
51.68 + return OMPI_SUCCESS;
51.69 + }
51.70 +
51.71 + /* number of data types copies that the scratch buffer can hold */
51.72 + n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_extent;
51.73 + if ( 0 == n_dts_per_buffer ) {
51.74 + rc=OMPI_ERROR;
51.75 + goto Error;
51.76 + }
51.77 +
51.78 + /* compute number of stripes needed to process this collective */
51.79 + n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
51.80 +
51.81 + /* get my reduction communication pattern */
51.82 + rc = netpatterns_setup_recursive_doubling_tree_node(n_peers,
51.83 + my_rank_in_group, &my_exchange_node);
51.84 + if(OMPI_SUCCESS != rc){
51.85 + return rc;
51.86 + }
51.87 +
51.88 + count_processed=0;
51.89 +
51.90 + /* get a pointer to the shared-memory working buffer */
51.91 + /* NOTE: starting with a rather synchronous approach */
51.92 + for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
51.93 +
51.94 + /* get number of elements to process in this stripe */
51.95 + count_this_stripe=n_dts_per_buffer;
51.96 + if( count_processed + count_this_stripe > count )
51.97 + count_this_stripe=count-count_processed;
51.98 +
51.99 + /* copy data from the input buffer into the temp buffer */
51.100 + sbuf_current=(char *)sbuf+count_processed*dt_extent;
51.101 + rc=ompi_datatype_copy_content_same_ddt(dtype,count_this_stripe,
51.102 + scratch_bufers[send_buffer], sbuf_current);
51.103 + if( OMPI_SUCCESS != rc ) {
51.104 + goto Error;
51.105 + }
51.106 +
51.107 + /* copy data in from the "extra" source, if need be */
51.108 + if(0 < my_exchange_node.n_extra_sources) {
51.109 +
51.110 + if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
51.111 +
51.112 + /*
51.113 + ** Receive data from extra node
51.114 + */
51.115 + extra_rank=my_exchange_node.rank_extra_source;
51.116 + rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
51.117 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.118 + -OMPI_COMMON_TAG_ALLREDUCE, comm,
51.119 + MPI_STATUSES_IGNORE));
51.120 + if( 0 > rc ) {
51.121 + fprintf(stderr," first recv failed in comm_allreduce_pml \n");
51.122 + fflush(stderr);
51.123 + goto Error;
51.124 + }
51.125 +
51.126 +
51.127 + /* apply collective operation to first half of the data */
51.128 + if( 0 < count_this_stripe ) {
51.129 + ompi_op_reduce(op,
51.130 + (void *)scratch_bufers[send_buffer],
51.131 + (void *)scratch_bufers[recv_buffer],
51.132 + count_this_stripe,dtype);
51.133 + }
51.134 +
51.135 +
51.136 + } else {
51.137 +
51.138 + /*
51.139 + ** Send data to "partner" node
51.140 + */
51.141 + extra_rank=my_exchange_node.rank_extra_source;
51.142 + rc=MCA_PML_CALL(send(scratch_bufers[send_buffer],
51.143 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.144 + -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
51.145 + comm));
51.146 + if( 0 > rc ) {
51.147 + fprintf(stderr," first send failed in comm_allreduce_pml \n");
51.148 + fflush(stderr);
51.149 + goto Error;
51.150 + }
51.151 + }
51.152 +
51.153 + /* change pointer to scratch buffer - this was we can send data
51.154 + ** that we have summed w/o a memory copy, and receive data into the
51.155 + ** other buffer, w/o fear of over writting data that has not yet
51.156 + ** completed being send
51.157 + */
51.158 + recv_buffer^=1;
51.159 + send_buffer^=1;
51.160 + }
51.161 +
51.162 + /* loop over data exchanges */
51.163 + for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
51.164 +
51.165 + /* is the remote data read */
51.166 + pair_rank=my_exchange_node.rank_exchanges[exchange];
51.167 +
51.168 + /* post non-blocking receive */
51.169 + rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer],
51.170 + count_this_stripe,dtype,ranks_in_comm[pair_rank],
51.171 + -OMPI_COMMON_TAG_ALLREDUCE,
51.172 + comm,&(requests[0])));
51.173 + if( 0 > rc ) {
51.174 + fprintf(stderr," irecv failed in comm_allreduce_pml at iterations %d \n",
51.175 + exchange);
51.176 + fflush(stderr);
51.177 + goto Error;
51.178 + }
51.179 +
51.180 + /* post non-blocking send */
51.181 + rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer],
51.182 + count_this_stripe,dtype, ranks_in_comm[pair_rank],
51.183 + -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
51.184 + comm,&(requests[1])));
51.185 + if( 0 > rc ) {
51.186 + fprintf(stderr," isend failed in comm_allreduce_pml at iterations %d \n",
51.187 + exchange);
51.188 + fflush(stderr);
51.189 + goto Error;
51.190 + }
51.191 + /* wait on send and receive completion */
51.192 + ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE);
51.193 +
51.194 + /* reduce the data */
51.195 + if( 0 < count_this_stripe ) {
51.196 + ompi_op_reduce(op,
51.197 + (void *)scratch_bufers[send_buffer],
51.198 + (void *)scratch_bufers[recv_buffer],
51.199 + count_this_stripe,dtype);
51.200 + }
51.201 + /* get ready for next step */
51.202 + recv_buffer^=1;
51.203 + send_buffer^=1;
51.204 +
51.205 + }
51.206 +
51.207 + /* copy data in from the "extra" source, if need be */
51.208 + if(0 < my_exchange_node.n_extra_sources) {
51.209 +
51.210 + if ( EXTRA_NODE == my_exchange_node.node_type ) {
51.211 + /*
51.212 + ** receive the data
51.213 + ** */
51.214 + extra_rank=my_exchange_node.rank_extra_source;
51.215 + rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
51.216 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.217 + -OMPI_COMMON_TAG_ALLREDUCE, comm,
51.218 + MPI_STATUSES_IGNORE));
51.219 + if( 0 > rc ) {
51.220 + fprintf(stderr," last recv failed in comm_allreduce_pml \n");
51.221 + fflush(stderr);
51.222 + goto Error;
51.223 + }
51.224 +
51.225 + recv_buffer^=1;
51.226 + send_buffer^=1;
51.227 + } else {
51.228 + /* send the data to the pair-rank outside of the power of 2 set
51.229 + ** of ranks
51.230 + */
51.231 +
51.232 + extra_rank=my_exchange_node.rank_extra_source;
51.233 + rc=MCA_PML_CALL(send((char *)scratch_bufers[send_buffer],
51.234 + count_this_stripe,dtype,ranks_in_comm[extra_rank],
51.235 + -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
51.236 + comm));
51.237 + if( 0 > rc ) {
51.238 + fprintf(stderr," last send failed in comm_allreduce_pml \n");
51.239 + fflush(stderr);
51.240 + goto Error;
51.241 + }
51.242 + }
51.243 + }
51.244 +
51.245 + /* copy data from the temp buffer into the output buffer */
51.246 + rbuf_current = (char *) rbuf + count_processed * dt_size;
51.247 + memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size);
51.248 +
51.249 + /* update the count of elements processed */
51.250 + count_processed += count_this_stripe;
51.251 + }
51.252 +
51.253 + /* return */
51.254 + return OMPI_SUCCESS;
51.255 +
51.256 +Error:
51.257 + return rc;
51.258 +}
52.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
52.2 +++ b/ompi/patterns/comm/bcast.c Tue Feb 19 22:50:56 2013 +0000
52.3 @@ -0,0 +1,97 @@
52.4 +/*
52.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
52.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
52.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
52.8 + * All rights reserved.
52.9 + * $COPYRIGHT$
52.10 + *
52.11 + * Additional copyrights may follow
52.12 + *
52.13 + * $HEADER$
52.14 + */
52.15 +/** @file */
52.16 +
52.17 +#include "ompi_config.h"
52.18 +
52.19 +#include "ompi/constants.h"
52.20 +#include "ompi/op/op.h"
52.21 +#include "ompi/datatype/ompi_datatype.h"
52.22 +#include "ompi/communicator/communicator.h"
52.23 +#include "opal/include/opal/sys/atomic.h"
52.24 +#include "ompi/mca/pml/pml.h"
52.25 +#include "ompi/patterns/net/netpatterns.h"
52.26 +#include "coll_ops.h"
52.27 +
52.28 +/**
52.29 + * Bcast - subgroup in communicator
52.30 + * This is a very simple algorithm - binary tree, transmitting the full
52.31 + * message at each step.
52.32 + */
52.33 +OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
52.34 + ompi_datatype_t *dtype, int my_rank_in_group,
52.35 + int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
52.36 +{
52.37 + /* local variables */
52.38 + int rc=OMPI_SUCCESS,msg_cnt,i;
52.39 + ompi_request_t *requests[2];
52.40 + int node_rank, peer_rank;
52.41 + netpatterns_tree_node_t node_data;
52.42 +
52.43 + /*
52.44 + * shift rank to root==0 tree
52.45 + */
52.46 + node_rank=(my_rank_in_group-root+n_peers)%n_peers;
52.47 +
52.48 + /*
52.49 + * compute my communication pattern - binary tree
52.50 + */
52.51 + rc=netpatterns_setup_narray_tree(2, node_rank, n_peers,
52.52 + &node_data);
52.53 + if( OMPI_SUCCESS != rc ) {
52.54 + goto Error;
52.55 + }
52.56 +
52.57 + /* 1 process special case */
52.58 + if(1 == n_peers) {
52.59 + return OMPI_SUCCESS;
52.60 + }
52.61 +
52.62 + /* if I have parents - wait on the data to arrive */
52.63 + if(node_data.n_parents) {
52.64 + /* I will have only 1 parent */
52.65 + peer_rank=node_data.parent_rank;
52.66 + peer_rank=(peer_rank+root)%n_peers;
52.67 + /* translate back to actual rank */
52.68 + rc=MCA_PML_CALL(recv(buffer, count,dtype,peer_rank,
52.69 + -OMPI_COMMON_TAG_BCAST, comm, MPI_STATUSES_IGNORE));
52.70 + if( 0 > rc ) {
52.71 + goto Error;
52.72 + }
52.73 + }
52.74 +
52.75 + /* send the data to my children */
52.76 + msg_cnt=0;
52.77 + for(i=0 ; i < node_data.n_children ; i++ ) {
52.78 + peer_rank=node_data.children_ranks[i];
52.79 + peer_rank=(peer_rank+root)%n_peers;
52.80 + rc=MCA_PML_CALL(isend(buffer,
52.81 + count,dtype,peer_rank,
52.82 + -OMPI_COMMON_TAG_BCAST,MCA_PML_BASE_SEND_STANDARD,
52.83 + comm,&(requests[msg_cnt])));
52.84 + if( 0 > rc ) {
52.85 + goto Error;
52.86 + }
52.87 + msg_cnt++;
52.88 + }
52.89 + /* wait for send completion */
52.90 + if(msg_cnt) {
52.91 + /* wait on send and receive completion */
52.92 + ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
52.93 + }
52.94 +
52.95 + /* return */
52.96 + return OMPI_SUCCESS;
52.97 +
52.98 +Error:
52.99 + return rc;
52.100 +}
53.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
53.2 +++ b/ompi/patterns/comm/coll_ops.h Tue Feb 19 22:50:56 2013 +0000
53.3 @@ -0,0 +1,51 @@
53.4 +/*
53.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
53.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
53.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
53.8 + * All rights reserved.
53.9 + * $COPYRIGHT$
53.10 + *
53.11 + * Additional copyrights may follow
53.12 + *
53.13 + * $HEADER$
53.14 + */
53.15 +
53.16 +#ifndef COMM_COLL_OP_TYPES_H
53.17 +#define COMM_COLL_OP_TYPES_H
53.18 +
53.19 +#include "ompi_config.h"
53.20 +#include "ompi/communicator/communicator.h"
53.21 +#include "ompi/datatype/ompi_datatype.h"
53.22 +#include "ompi/proc/proc.h"
53.23 +
53.24 +BEGIN_C_DECLS
53.25 +
53.26 +#define OMPI_COMMON_TAG_ALLREDUCE 99
53.27 +#define OMPI_COMMON_TAG_BCAST 98
53.28 +
53.29 +
53.30 +
53.31 +
53.32 +OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
53.33 + ompi_datatype_t *dtype, int my_rank_in_group, int n_peers,
53.34 + int *ranks_in_comm,ompi_communicator_t *comm);
53.35 +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
53.36 + ompi_datatype_t *dtype, int my_rank_in_group,
53.37 + struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
53.38 + ompi_communicator_t *comm);
53.39 +OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
53.40 + ompi_datatype_t *dtype, int my_rank_in_group,
53.41 + int n_peers, int *ranks_in_comm,ompi_communicator_t
53.42 + *comm);
53.43 +
53.44 +/* reduction operations supported */
53.45 +#define OP_SUM 1
53.46 +#define OP_MAX 2
53.47 +#define OP_MIN 3
53.48 +
53.49 +#define TYPE_INT4 1
53.50 +
53.51 +
53.52 +END_C_DECLS
53.53 +
53.54 +#endif /* COMM_COLL_OP_TYPES_H */
54.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
54.2 +++ b/ompi/patterns/comm/commpatterns.h Tue Feb 19 22:50:56 2013 +0000
54.3 @@ -0,0 +1,22 @@
54.4 +/*
54.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
54.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
54.7 + * $COPYRIGHT$
54.8 + *
54.9 + * Additional copyrights may follow
54.10 + *
54.11 + * $HEADER$
54.12 + */
54.13 +
54.14 +#ifndef COMM_NETPATTERNS_H
54.15 +#define COMM_NETPATTERNS_H
54.16 +
54.17 +#include "ompi_config.h"
54.18 +
54.19 +BEGIN_C_DECLS
54.20 +
54.21 +#define MAX_TMP_BUFFER 8192
54.22 +
54.23 +END_C_DECLS
54.24 +
54.25 +#endif /* COMM_NETPATTERNS_H */
55.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
55.2 +++ b/ompi/patterns/net/Makefile.am Tue Feb 19 22:50:56 2013 +0000
55.3 @@ -0,0 +1,18 @@
55.4 +# Copyright (c) 2013 Oak Ridge National Laboratory. All rights reserved.
55.5 +# $COPYRIGHT$
55.6 +#
55.7 +# Additional copyrights may follow
55.8 +#
55.9 +# $HEADER$
55.10 +#
55.11 +
55.12 +headers += \
55.13 + patterns/net/netpatterns.h \
55.14 + patterns/net/netpatterns_knomial_tree.h \
55.15 + patterns/net/coll_ops.h
55.16 +
55.17 +libmpi_la_SOURCES += \
55.18 + patterns/net/netpatterns_base.c \
55.19 + patterns/net/netpatterns_multinomial_tree.c \
55.20 + patterns/net/netpatterns_nary_tree.c \
55.21 + patterns/net/netpatterns_knomial_tree.c
56.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
56.2 +++ b/ompi/patterns/net/allreduce.c Tue Feb 19 22:50:56 2013 +0000
56.3 @@ -0,0 +1,347 @@
56.4 +/*
56.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
56.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
56.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
56.8 + * All rights reserved.
56.9 + * $COPYRIGHT$
56.10 + *
56.11 + * Additional copyrights may follow
56.12 + *
56.13 + * $HEADER$
56.14 + */
56.15 +/** @file */
56.16 +
56.17 +#include "ompi_config.h"
56.18 +
56.19 +#include "ompi/constants.h"
56.20 +#include "coll_sm2.h"
56.21 +#include "ompi/op/op.h"
56.22 +#include "ompi/datatype/ompi_datatype.h"
56.23 +#include "ompi/communicator/communicator.h"
56.24 +#include "ompi/mca/rte/rte.h"
56.25 +
56.26 +void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg,
56.27 + int count, ompi_rml_tag_t tag, void* cbdata)
56.28 +{
56.29 + /* set send completion flag */
56.30 + *(int *)cbdata=1;
56.31 +}
56.32 +
56.33 +
56.34 +void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg,
56.35 + int count, ompi_rml_tag_t tag, void* cbdata)
56.36 +{
56.37 + /* set receive completion flag */
56.38 + MB();
56.39 + *(int *)cbdata=1;
56.40 +}
56.41 +
56.42 +
56.43 +static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int count,
56.44 + int data_type)
56.45 +{
56.46 + /* local variables */
56.47 + int ret;
56.48 +
56.49 + /* op type */
56.50 + switch (op_type) {
56.51 +
56.52 + case OP_SUM:
56.53 +
56.54 +
56.55 + switch (data_type) {
56.56 + case TYPE_INT4:
56.57 + int *int_src_ptr=(int *)src_ptr;
56.58 + int *int_src_dst_ptr=(int *)src_dst_ptr;
56.59 + int cnt;
56.60 + for(cnt=0 ; cnt < count ; ) {
56.61 + (*(int_src_dst_ptr))+=(*(int_src_ptr));
56.62 + break;
56.63 + default:
56.64 + ret=OMPI_ERROR;
56.65 + goto Error;
56.66 + }
56.67 +
56.68 + break;
56.69 +
56.70 + default:
56.71 + ret=OMPI_ERROR;
56.72 + goto Error;
56.73 + }
56.74 +Error:
56.75 + return ret;
56.76 +}
56.77 +
56.78 +/**
56.79 + * All-reduce for contigous primitive types
56.80 + */
56.81 +static
56.82 +comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
56.83 + int op_type, opal_list_t *peers)
56.84 +{
56.85 + /* local variables */
56.86 + int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
56.87 + int pair_rank,exchange,extra_rank;
56.88 + int index_read,index_write;
56.89 + netpatterns_pair_exchange_node_t my_exchange_node;
56.90 + int my_rank,count_processed,count_this_stripe;
56.91 + size_t n_peers,message_extent,len_data_buffer;
56.92 + size_t dt_size;
56.93 + long long tag, base_tag;
56.94 + sm_work_buffer_t *sm_buffer_desc;
56.95 + opal_list_item_t *item;
56.96 + char scratch_bufers[2][MAX_TMP_BUFFER];
56.97 + int send_buffer=0;recv_buffer=1;
56.98 + char *sbuf_current,*rbuf_current;
56.99 + ompi_proc_t **proc_array;
56.100 + struct iovec send_iov, recv_iov;
56.101 + volatile int *recv_done, *send_done;
56.102 + int recv_completion_flag, send_completion_flag;
56.103 + int data_type;
56.104 +
56.105 + /* get size of data needed - same layout as user data, so that
56.106 + * we can apply the reudction routines directly on these buffers
56.107 + */
56.108 + rc=opal_datatype_type_size(dtype, &dt_size);
56.109 + if( OMPI_SUCCESS != rc ) {
56.110 + goto Error;
56.111 + }
56.112 + message_extent=dt_extent*count;
56.113 +
56.114 + /* lenght of control and data regions */
56.115 + len_data_buffer=sm_module->data_memory_per_proc_per_segment;
56.116 +
56.117 + /* number of data types copies that the scratch buffer can hold */
56.118 + n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_size;
56.119 + if ( 0 == n_dts_per_buffer ) {
56.120 + rc=OMPI_ERROR;
56.121 + goto Error;
56.122 + }
56.123 +
56.124 + /* need a read and a write buffer for a pair-wise exchange of data */
56.125 + n_dts_per_buffer/=2;
56.126 + len_data_buffer=n_dts_per_buffer*dt_size;
56.127 +
56.128 + /* compute number of stripes needed to process this collective */
56.129 + n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
56.130 +
56.131 + /* */
56.132 + n_peers=opal_list_get_size(peers);
56.133 +
56.134 + /* get my rank in the list */
56.135 + my_rank=0;
56.136 + for (item = opal_list_get_first(peers) ;
56.137 + item != opal_list_get_end(peers) ;
56.138 + item = opal_list_get_next(peers)) {
56.139 + if(ompi_proc_local()==(ompi_proc_t *)item){
56.140 + /* this is the pointer to my proc strucuture */
56.141 + break;
56.142 + }
56.143 + my_rank++;
56.144 + }
56.145 + proc_array=(ompi_proc_t **)malloc(sizeof(ompi_proc_t *)*n_peers);
56.146 + if( NULL == proc_array) {
56.147 + goto Error;
56.148 + }
56.149 + cnt=0;
56.150 + for (item = opal_list_get_first(peers) ;
56.151 + item != opal_list_get_end(peers) ;
56.152 + item = opal_list_get_next(peers)) {
56.153 + proc_array[cnt]=(ompi_proc_t *)item;
56.154 + cnt++;
56.155 + }
56.156 +
56.157 + /* get my reduction communication pattern */
56.158 + ret=netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node);
56.159 + if(OMPI_SUCCESS != ret){
56.160 + return ret;
56.161 + }
56.162 +
56.163 + /* setup flags for non-blocking communications */
56.164 + recv_done=&recv_completion_flag;
56.165 + send_done=&send_completion_flag;
56.166 +
56.167 + /* set data type */
56.168 + if(&opal_datatype_int4==dtype) {
56.169 + data_type=TYPE_INT4;
56.170 + }
56.171 +
56.172 + count_processed=0;
56.173 +
56.174 + /* get a pointer to the shared-memory working buffer */
56.175 + /* NOTE: starting with a rather synchronous approach */
56.176 + for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
56.177 +
56.178 + /* get number of elements to process in this stripe */
56.179 + count_this_stripe=n_dts_per_buffer;
56.180 + if( count_processed + count_this_stripe > count )
56.181 + count_this_stripe=count-count_processed;
56.182 +
56.183 + /* copy data from the input buffer into the temp buffer */
56.184 + sbuf_current=(char *)sbuf+count_processed*dt_size;
56.185 + memcopy(scratch_bufers[send_buffer],sbuf_current,count_this_stripe*dt_size);
56.186 +
56.187 + /* copy data in from the "extra" source, if need be */
56.188 + if(0 < my_exchange_node->n_extra_sources) {
56.189 +
56.190 + if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
56.191 +
56.192 + /*
56.193 + ** Receive data from extra node
56.194 + */
56.195 +
56.196 + extra_rank=my_exchange_node.rank_extra_source;
56.197 + recv_iov.iov_base=scratch_bufers[recv_buffer];
56.198 + recv_iov.iov_len=count_this_stripe*dt_size;
56.199 + rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
56.200 + OMPI_RML_TAG_ALLREDUCE , 0);
56.201 + if(OMPI_SUCCESS != rc ) {
56.202 + goto Error;
56.203 + }
56.204 +
56.205 + /* apply collective operation to first half of the data */
56.206 + if( 0 < count_this_stripe ) {
56.207 + op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
56.208 + (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
56.209 + }
56.210 +
56.211 +
56.212 + } else {
56.213 +
56.214 + /*
56.215 + ** Send data to "partner" node
56.216 + */
56.217 + extra_rank=my_exchange_node.rank_extra_source;
56.218 + send_iov.iov_base=scratch_bufers[send_buffer];
56.219 + send_iov.iov_len=count_this_stripe*dt_size;
56.220 + rc = ompi_rte_send(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
56.221 + OMPI_RML_TAG_ALLREDUCE , 0);
56.222 + if(OMPI_SUCCESS != rc ) {
56.223 + goto Error;
56.224 + }
56.225 + }
56.226 +
56.227 + /* change pointer to scratch buffer - this was we can send data
56.228 + ** that we have summed w/o a memory copy, and receive data into the
56.229 + ** other buffer, w/o fear of over writting data that has not yet
56.230 + ** completed being send
56.231 + */
56.232 + recv_buffer^=1;
56.233 + send_buffer^=1;
56.234 + }
56.235 +
56.236 + MB();
56.237 + /*
56.238 + * Signal parent that data is ready
56.239 + */
56.240 + tag=base_tag+1;
56.241 + my_ctl_pointer->flag=tag;
56.242 +
56.243 + /* loop over data exchanges */
56.244 + for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
56.245 +
56.246 + /* debug
56.247 + t4=opal_sys_timer_get_cycles();
56.248 + end debug */
56.249 +
56.250 +
56.251 + my_write_pointer=my_tmp_data_buffer[index_write];
56.252 + my_read_pointer=my_tmp_data_buffer[index_read];
56.253 +
56.254 + /* is the remote data read */
56.255 + pair_rank=my_exchange_node->rank_exchanges[exchange];
56.256 +
56.257 + *recv_done=0;
56.258 + *send_done=0;
56.259 + MB();
56.260 +
56.261 + /* post non-blocking receive */
56.262 + recv_iov.iov_base=scratch_bufers[send_buffer];
56.263 + recv_iov.iov_len=count_this_stripe*dt_size;
56.264 + rc = ompi_rte_recv_nb(&(proc_array[extra_rank]->proc_name), recv_iov, 1,
56.265 + OMPI_RML_TAG_ALLREDUCE , 0, recv_completion, recv_done);
56.266 +
56.267 + /* post non-blocking send */
56.268 + send_iov.iov_base=scratch_bufers[send_buffer];
56.269 + send_iov.iov_len=count_this_stripe*dt_size;
56.270 + rc = ompi_rte_send_nb(&(proc_array[extra_rank]->proc_name), send_iov, 1,
56.271 + OMPI_RML_TAG_ALLREDUCE , 0, send_completion, send_done);
56.272 +
56.273 + /* wait on receive completion */
56.274 + while(!(*recv_done) ) {
56.275 + opal_progress();
56.276 + }
56.277 +
56.278 + /* reduce the data */
56.279 + if( 0 < count_this_stripe ) {
56.280 + op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
56.281 + (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
56.282 + }
56.283 +
56.284 +
56.285 + /* get ready for next step */
56.286 + index_read=(exchange&1);
56.287 + index_write=((exchange+1)&1);
56.288 +
56.289 + /* wait on send completion */
56.290 + while(!(*send_done) ) {
56.291 + opal_progress();
56.292 + }
56.293 +
56.294 + }
56.295 +
56.296 + /* copy data in from the "extra" source, if need be */
56.297 + if(0 < my_exchange_node->n_extra_sources) {
56.298 +
56.299 + if ( EXTRA_NODE == my_exchange_node->node_type ) {
56.300 + /*
56.301 + ** receive the data
56.302 + ** */
56.303 + extra_rank=my_exchange_node->rank_extra_source;
56.304 +
56.305 + recv_iov.iov_base=scratch_bufers[recv_buffer];
56.306 + recv_iov.iov_len=count_this_stripe*dt_size;
56.307 + rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
56.308 + OMPI_RML_TAG_ALLREDUCE , 0);
56.309 + if(OMPI_SUCCESS != rc ) {
56.310 + goto Error;
56.311 + }
56.312 +
56.313 + } else {
56.314 + /* send the data to the pair-rank outside of the power of 2 set
56.315 + ** of ranks
56.316 + */
56.317 +
56.318 + extra_rank=my_exchange_node->rank_extra_source;
56.319 + send_iov.iov_base=scratch_bufers[recv_buffer];
56.320 + send_iov.iov_len=count_this_stripe*dt_size;
56.321 + rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
56.322 + OMPI_RML_TAG_ALLREDUCE , 0);
56.323 + if(OMPI_SUCCESS != rc ) {
56.324 + goto Error;
56.325 + }
56.326 + }
56.327 + }
56.328 +
56.329 + /* copy data into the destination buffer */
56.330 + rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
56.331 + (char *)((char *)rbuf+dt_extent*count_processed),
56.332 + (char *)my_write_pointer);
56.333 + if( 0 != rc ) {
56.334 + return OMPI_ERROR;
56.335 + }
56.336 +
56.337 + /* copy data from the temp buffer into the output buffer */
56.338 + rbuf_current=(char *)rbuf+count_processed*dt_size;
56.339 + memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size);
56.340 +
56.341 + /* update the count of elements processed */
56.342 + count_processed+=count_this_stripe;
56.343 + }
56.344 +
56.345 + /* return */
56.346 + return rc;
56.347 +
56.348 +Error:
56.349 + return rc;
56.350 +}
57.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
57.2 +++ b/ompi/patterns/net/coll_ops.h Tue Feb 19 22:50:56 2013 +0000
57.3 @@ -0,0 +1,29 @@
57.4 +/*
57.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
57.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
57.7 + * $COPYRIGHT$
57.8 + *
57.9 + * Additional copyrights may follow
57.10 + *
57.11 + * $HEADER$
57.12 + */
57.13 +
57.14 +#ifndef COMM_OP_TYPES_H
57.15 +#define COMM_OP_TYPES_H
57.16 +
57.17 +#include "ompi_config.h"
57.18 +
57.19 +BEGIN_C_DECLS
57.20 +
57.21 +int comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
57.22 + int op, opal_list_t *peers);
57.23 +
57.24 +/* reduction operations supported */
57.25 +#define OP_SUM 1
57.26 +
57.27 +#define TYPE_INT4 1
57.28 +
57.29 +
57.30 +END_C_DECLS
57.31 +
57.32 +#endif /* COMM_OP_TYPES_H */
58.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
58.2 +++ b/ompi/patterns/net/netpatterns.h Tue Feb 19 22:50:56 2013 +0000
58.3 @@ -0,0 +1,152 @@
58.4 +/*
58.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
58.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
58.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
58.8 + * All rights reserved.
58.9 + * $COPYRIGHT$
58.10 + *
58.11 + * Additional copyrights may follow
58.12 + *
58.13 + * $HEADER$
58.14 + */
58.15 +
58.16 +#ifndef COMM_PATTERNS_H
58.17 +#define COMM_PATTERNS_H
58.18 +
58.19 +#include "ompi_config.h"
58.20 +#include "orte/util/proc_info.h"
58.21 +#include "orte/runtime/orte_globals.h"
58.22 +#include "orte/util/name_fns.h"
58.23 +
58.24 +#include "netpatterns_knomial_tree.h"
58.25 +
58.26 +BEGIN_C_DECLS
58.27 +
58.28 +int netpatterns_base_err(const char* fmt, ...);
58.29 +int netpatterns_register_mca_params(void);
58.30 +
58.31 +#if OPAL_ENABLE_DEBUG
58.32 +extern int netpatterns_base_verbose; /* disabled by default */
58.33 +OMPI_DECLSPEC extern int netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
58.34 +#define NETPATTERNS_VERBOSE(args) \
58.35 + do { \
58.36 + if(netpatterns_base_verbose > 0) { \
58.37 + netpatterns_base_err("[%s]%s[%s:%d:%s] ",\
58.38 + orte_process_info.nodename, \
58.39 + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
58.40 + __FILE__, __LINE__, __func__); \
58.41 + netpatterns_base_err args; \
58.42 + netpatterns_base_err("\n"); \
58.43 + } \
58.44 + } while(0);
58.45 +#else
58.46 +#define NETPATTERNS_VERBOSE(args)
58.47 +#endif
58.48 +
58.49 +#define FIND_BASE(base,myid,level,k) \
58.50 + do { \
58.51 + int temp = 1; \
58.52 + int jj; \
58.53 + int knt2; \
58.54 + \
58.55 + base = 0; \
58.56 + for( jj = 0; jj < level; jj++) {\
58.57 + temp *= k; \
58.58 + } \
58.59 + knt2 = 1; \
58.60 + while(myid >= knt2*temp){ \
58.61 + knt2++; \
58.62 + } \
58.63 + base = knt2*temp - temp; \
58.64 + } while(0) \
58.65 +
58.66 +
58.67 +
58.68 +
58.69 +/* enum for node type */
58.70 +enum {
58.71 + ROOT_NODE,
58.72 + LEAF_NODE,
58.73 + INTERIOR_NODE
58.74 +};
58.75 +
58.76 +
58.77 +/*
58.78 + * N-order tree node description
58.79 + */
58.80 +struct netpatterns_tree_node_t {
58.81 + /* my rank within the group */
58.82 + int my_rank;
58.83 + /* my node type - root, leaf, or interior */
58.84 + int my_node_type;
58.85 + /* number of nodes in the tree */
58.86 + int tree_size;
58.87 + /* number of parents (0/1) */
58.88 + int n_parents;
58.89 + /* number of children */
58.90 + int n_children;
58.91 + /* parent rank within the group */
58.92 + int parent_rank;
58.93 + /* chidren ranks within the group */
58.94 + int *children_ranks;
58.95 +};
58.96 +typedef struct netpatterns_tree_node_t netpatterns_tree_node_t;
58.97 +
58.98 +struct netpatterns_k_exchange_node_t;
58.99 +/*
58.100 + * N-order + knominal tree node description
58.101 + */
58.102 +struct netpatterns_narray_knomial_tree_node_t {
58.103 + /* my rank within the group */
58.104 + int my_rank;
58.105 + /* my node type - root, leaf, or interior */
58.106 + int my_node_type;
58.107 + /* number of nodes in the tree */
58.108 + int tree_size;
58.109 + /* number of parents (0/1) */
58.110 + int n_parents;
58.111 + /* number of children */
58.112 + int n_children;
58.113 + /* parent rank within the group */
58.114 + int parent_rank;
58.115 + /* chidren ranks within the group */
58.116 + int *children_ranks;
58.117 + /* Total number of ranks on this specific level */
58.118 + int level_size;
58.119 + /* Rank on this node inside of level */
58.120 + int rank_on_level;
58.121 + /* Knomial recursive gather information */
58.122 + struct netpatterns_k_exchange_node_t k_node;
58.123 +};
58.124 +typedef struct netpatterns_narray_knomial_tree_node_t
58.125 +netpatterns_narray_knomial_tree_node_t;
58.126 +
58.127 +
58.128 +/* Init code for common_netpatterns */
58.129 +OMPI_DECLSPEC int netpatterns_init(void);
58.130 +
58.131 +/* setup an n-array tree */
58.132 +OMPI_DECLSPEC int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
58.133 + netpatterns_tree_node_t *my_node);
58.134 +/* setup an n-array tree with k-nomial levels */
58.135 +OMPI_DECLSPEC int netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes,
58.136 + netpatterns_narray_knomial_tree_node_t *my_node);
58.137 +
58.138 +/* setup an multi-nomial tree - for each node in the tree
58.139 + * this returns it's parent, and it's children
58.140 + */
58.141 +OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
58.142 + netpatterns_tree_node_t *tree_nodes);
58.143 +
58.144 +OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(int tree_order,
58.145 + int num_nodes, netpatterns_tree_node_t **tree_nodes);
58.146 +
58.147 +/* calculate the nearest power of radix that is equal to or greater
58.148 + * than size, with the specified radix. The resulting tree is of
58.149 + * depth n_lvls.
58.150 + */
58.151 +OMPI_DECLSPEC int roundup_to_power_radix( int radix, int size, int *n_lvls );
58.152 +
58.153 +END_C_DECLS
58.154 +
58.155 +#endif /* COMM_PATTERNS_H */
59.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
59.2 +++ b/ompi/patterns/net/netpatterns_base.c Tue Feb 19 22:50:56 2013 +0000
59.3 @@ -0,0 +1,53 @@
59.4 +/*
59.5 + *
59.6 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
59.7 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
59.8 + * $COPYRIGHT$
59.9 + *
59.10 + * Additional copyrights may follow
59.11 + *
59.12 + * $HEADER$
59.13 + */
59.14 +#include "opal/mca/base/mca_base_param.h"
59.15 +#include "ompi/include/ompi/constants.h"
59.16 +#include "netpatterns.h"
59.17 +
59.18 +int netpatterns_base_verbose = 0; /* disabled by default */
59.19 +
59.20 +int netpatterns_register_mca_params(void)
59.21 +{
59.22 + mca_base_param_reg_int_name("common",
59.23 + "netpatterns_base_verbose",
59.24 + "Verbosity level of the NETPATTERNS framework",
59.25 + false, false,
59.26 + 0,
59.27 + &netpatterns_base_verbose);
59.28 +
59.29 + return OMPI_SUCCESS;
59.30 +}
59.31 +
59.32 +int netpatterns_base_err(const char* fmt, ...)
59.33 +{
59.34 + va_list list;
59.35 + int ret;
59.36 +
59.37 + va_start(list, fmt);
59.38 + ret = vfprintf(stderr, fmt, list);
59.39 + va_end(list);
59.40 + return ret;
59.41 +}
59.42 +
59.43 +int netpatterns_init(void)
59.44 +{
59.45 +/* There is no component for common_netpatterns so every component that uses it
59.46 + should call netpatterns_init, still we want to run it only once */
59.47 +static int was_called = 0;
59.48 +
59.49 + if (0 == was_called) {
59.50 + was_called = 1;
59.51 +
59.52 + return netpatterns_register_mca_params();
59.53 + }
59.54 +
59.55 + return OMPI_SUCCESS;
59.56 +}
60.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
60.2 +++ b/ompi/patterns/net/netpatterns_knomial_tree.c Tue Feb 19 22:50:56 2013 +0000
60.3 @@ -0,0 +1,932 @@
60.4 +/*
60.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
60.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
60.7 +* $COPYRIGHT$
60.8 + *
60.9 + * Additional copyrights may follow
60.10 + *
60.11 + * $HEADER$
60.12 + */
60.13 +
60.14 +#include "ompi_config.h"
60.15 +#ifdef HAVE_UNISTD_H
60.16 +#include <unistd.h>
60.17 +#endif
60.18 +#include <sys/types.h>
60.19 +#ifdef HAVE_SYS_MMAN_H
60.20 +#include <sys/mman.h>
60.21 +#endif
60.22 +#include <fcntl.h>
60.23 +#include <stdlib.h>
60.24 +#include <assert.h>
60.25 +
60.26 +#include "ompi/constants.h"
60.27 +#include "netpatterns.h"
60.28 +
60.29 +/* setup recursive doubleing tree node */
60.30 +
60.31 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node(
60.32 + int num_nodes, int node_rank, int tree_order, int *hier_ranks,
60.33 + netpatterns_k_exchange_node_t *exchange_node)
60.34 +{
60.35 + /* local variables */
60.36 + int i, j, cnt, i_temp;
60.37 + int knt,knt2,kk, ex_node, stray;
60.38 + int n_levels,pow_k;
60.39 + int k_temp1;
60.40 + int k_temp2;
60.41 + int myid, reindex_myid = 0;
60.42 + int base, peer_base,base_temp;
60.43 + int peer;
60.44 + int *prev_data = NULL;
60.45 + int *current_data = NULL;
60.46 + int *group_info = NULL;
60.47 +
60.48 +
60.49 + NETPATTERNS_VERBOSE(
60.50 + ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
60.51 + num_nodes, node_rank, tree_order));
60.52 +
60.53 + assert(num_nodes > 1);
60.54 + assert(tree_order > 1);
60.55 + if (tree_order > num_nodes) {
60.56 + tree_order = num_nodes;
60.57 + }
60.58 +
60.59 + /* k-nomial radix */
60.60 + exchange_node->tree_order = tree_order;
60.61 +
60.62 + /* Calculate the number of levels in the tree for
60.63 + * the largest power of tree_order less than or
60.64 + * equal to the group size
60.65 + */
60.66 + n_levels = 0;
60.67 + cnt=1;
60.68 + while ( num_nodes > cnt ) {
60.69 + cnt *= tree_order;
60.70 + n_levels++;
60.71 + }
60.72 + /* this is the actual number of recusive k-ing steps
60.73 + * we will perform, the last step may not be a full
60.74 + * step depending on the outcome of the next conditional
60.75 + */
60.76 + pow_k = n_levels;
60.77 +
60.78 + /* figure out the largest power of tree_order that is less than or equal to
60.79 + * num_nodes */
60.80 + if ( cnt > num_nodes) {
60.81 + cnt /= tree_order;
60.82 + n_levels--;
60.83 + }
60.84 +
60.85 + /*exchange_node->log_tree_order = n_levels;*/
60.86 + exchange_node->log_tree_order = pow_k;
60.87 + exchange_node->n_largest_pow_tree_order = cnt;
60.88 +
60.89 +
60.90 + /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
60.91 + /* I don't think we need to cache this info this group_info array */
60.92 + group_info = (int *) calloc(pow_k , sizeof(int));
60.93 + group_info[0] = num_nodes/tree_order;
60.94 + /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
60.95 + for ( i = 1; i < pow_k; i ++) {
60.96 + group_info[i] = group_info[i-1]/tree_order;
60.97 + /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
60.98 +
60.99 + }
60.100 +
60.101 + /* find number of incomplete groups and number of ranks belonging to those ranks */
60.102 + knt=0;
60.103 + while (knt <= (pow_k - 1) && group_info[knt] > 0) {
60.104 + knt++;
60.105 + }
60.106 + knt--;
60.107 + /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
60.108 +
60.109 + /* k_temp is a synonym for cnt which is the largest full power of k group */
60.110 + /* now, start the calculation to find the first stray rank aka "extra" rank */
60.111 + stray = 0;
60.112 + /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt,
60.113 + cnt*group_info[knt],
60.114 + num_nodes - cnt*group_info[knt]);*/
60.115 +
60.116 +
60.117 + /* cache this info, it's muy importante */
60.118 + stray = cnt*group_info[knt];
60.119 + exchange_node->k_nomial_stray = stray;
60.120 +
60.121 +
60.122 +
60.123 + /* before we do this, we need to first reindex */
60.124 + /* reindexing phase */
60.125 + /* this is the reindex phase */
60.126 + exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
60.127 + /* this is the inverse map */
60.128 + exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
60.129 + /*int reindex_myid;*/
60.130 + /* reindex */
60.131 + if( stray < num_nodes ) {
60.132 + /* find the first proxy rank */
60.133 + peer = stray - cnt;
60.134 + /* fix all ranks prior to this rank */
60.135 + for( i = 0; i < peer; i++){
60.136 + exchange_node->reindex_map[i] = i;
60.137 + }
60.138 + /* now, start the swap */
60.139 + exchange_node->reindex_map[peer] = peer;
60.140 + for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
60.141 + exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
60.142 + }
60.143 + i_temp = i;
60.144 + for( i = i_temp; i < stray; i++) {
60.145 + exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
60.146 + }
60.147 + /* now, finish it off */
60.148 + exchange_node->reindex_map[stray] = peer + 1;
60.149 + for( i = (stray+1); i < num_nodes; i++) {
60.150 + exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
60.151 + }
60.152 + /* debug print */
60.153 + /*
60.154 + for( i = 0; i < np; i++){
60.155 + fprintf(stderr,"%d ",reindex_map[i]);
60.156 + }
60.157 + fprintf(stderr,"\n");
60.158 + */
60.159 + } else {
60.160 + /* we have no extras, trivial reindexing */
60.161 + for( i = 0; i < num_nodes; i++){
60.162 + exchange_node->reindex_map[i] = i;
60.163 + }
60.164 + }
60.165 + /* finished reindexing */
60.166 +
60.167 + /* Now, I need to get my rank in the new indexing */
60.168 + for( i = 0; i < num_nodes; i++ ){
60.169 + if( node_rank == exchange_node->reindex_map[i] ){
60.170 + exchange_node->reindex_myid = i;
60.171 + break;
60.172 + }
60.173 + }
60.174 + /* Now, let's compute the inverse mapping here */
60.175 + for( i = 0; i < num_nodes; i++){
60.176 + j = 0;
60.177 + while(exchange_node->reindex_map[j] != i ){
60.178 + j++;
60.179 + }
60.180 + exchange_node->inv_reindex_map[i] = j;
60.181 + }
60.182 +
60.183 +
60.184 + /* Now we get the data sizes we should expect at each level */
60.185 + /* now get the size of the data I am to receive from each peer */
60.186 + /*int **payload_info;*/
60.187 + prev_data = (int *) malloc( num_nodes*sizeof(int) );
60.188 + if( NULL == prev_data ) {
60.189 + goto Error;
60.190 + }
60.191 +
60.192 + current_data = (int *) malloc( num_nodes*sizeof(int) );
60.193 + if( NULL == current_data ) {
60.194 + goto Error;
60.195 + }
60.196 +
60.197 +
60.198 + exchange_node->payload_info = (netpatterns_payload_t **) malloc(sizeof(netpatterns_payload_t *)*pow_k);
60.199 + if( NULL == exchange_node->payload_info) {
60.200 + goto Error;
60.201 + }
60.202 +
60.203 + for(i = 0; i < pow_k; i++){
60.204 + exchange_node->payload_info[i] = (netpatterns_payload_t *) malloc(sizeof(netpatterns_payload_t)*(tree_order-1));
60.205 + if( NULL == exchange_node->payload_info[i]) {
60.206 + goto Error;
60.207 + }
60.208 +
60.209 + }
60.210 + /* intialize the payload array
60.211 + This is the money struct, just need to initialize this with
60.212 + the subgroup information */
60.213 + /*
60.214 + for(i = 0; i < num_nodes; i++){
60.215 + prev_data[i] = 1;
60.216 + current_data[i] = 1;
60.217 + }
60.218 + */
60.219 +
60.220 + for(i = 0; i < num_nodes; i++){
60.221 + prev_data[i] = hier_ranks[i];
60.222 + current_data[i] = hier_ranks[i];
60.223 + }
60.224 +
60.225 + /* everyone will need to do this loop over all ranks
60.226 + * Phase I calculate the contribution from the extra ranks
60.227 + */
60.228 + for( myid = 0; myid < num_nodes; myid++) {
60.229 + /* get my new rank */
60.230 + for( j = 0; j < num_nodes; j++ ){
60.231 + /* this will be satisfied for one of the indices */
60.232 + if( myid == exchange_node->reindex_map[j] ){
60.233 + reindex_myid = j;
60.234 + break;
60.235 + }
60.236 + }
60.237 +
60.238 + for( j = stray; j < num_nodes; j++) {
60.239 + if(reindex_myid == ( j - cnt )) {
60.240 + /* then this is a proxy rank */
60.241 + prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
60.242 + break;
60.243 + }
60.244 +
60.245 + }
60.246 + }
60.247 +
60.248 + /* Phase II calculate the contribution from each recursive k - ing level
60.249 + *
60.250 + */
60.251 + k_temp1 = tree_order; /* k^1 */
60.252 + k_temp2 = 1; /* k^0 */
60.253 + peer_base = 0;
60.254 + base_temp = 0;
60.255 + for( i = 0; i < pow_k; i++) {
60.256 + /* get my new rank */
60.257 + for( myid = 0; myid < num_nodes; myid++){
60.258 + current_data[myid] = prev_data[myid];
60.259 + /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
60.260 + for( j = 0; j < num_nodes; j++ ){
60.261 + if( myid == exchange_node->reindex_map[j] ){
60.262 + reindex_myid = j;
60.263 + break;
60.264 + }
60.265 + }
60.266 + if( reindex_myid < stray ) {
60.267 + /* now start the actual algorithm */
60.268 + FIND_BASE(base,reindex_myid,i+1,tree_order);
60.269 + for( j = 0; j < ( tree_order - 1 ); j ++ ) {
60.270 + peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
60.271 + if( peer < stray ) {
60.272 + /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
60.273 + /* then get the data */
60.274 + if( node_rank == myid ){
60.275 + exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
60.276 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
60.277 + if( i > 0 ) {
60.278 +
60.279 + /* find my len and offset */
60.280 + FIND_BASE(peer_base,peer,i,tree_order);
60.281 + /* I do not want to mess with this, but it seems that I have no choice */
60.282 + ex_node = exchange_node->reindex_map[peer_base];
60.283 + /* now, find out how far down the line this guy really is */
60.284 + knt2 =0;
60.285 + for(kk = 0; kk < ex_node; kk++){
60.286 + knt2 += hier_ranks[kk];
60.287 + }
60.288 + exchange_node->payload_info[i][j].r_offset = knt2;
60.289 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
60.290 +
60.291 + FIND_BASE(base_temp,reindex_myid,i,tree_order);
60.292 + ex_node = exchange_node->reindex_map[base_temp];
60.293 + knt2 = 0;
60.294 + for( kk = 0; kk < ex_node; kk++){
60.295 + knt2 += hier_ranks[kk];
60.296 + }
60.297 + exchange_node->payload_info[i][j].s_offset =
60.298 + knt2; /* exchange_node->reindex_map[base_temp]; */
60.299 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
60.300 + } else {
60.301 + ex_node = exchange_node->reindex_map[peer];
60.302 + knt2 =0;
60.303 + for(kk = 0; kk < ex_node; kk++){
60.304 + knt2 += hier_ranks[kk];
60.305 + }
60.306 + exchange_node->payload_info[i][j].r_offset =
60.307 + knt2; /*exchange_node->reindex_map[peer]; */
60.308 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
60.309 + knt2 = 0;
60.310 + for(kk = 0; kk < myid; kk++){
60.311 + knt2 += hier_ranks[kk];
60.312 + }
60.313 + exchange_node->payload_info[i][j].s_offset = knt2;
60.314 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
60.315 + }
60.316 + /* how much I am to receive from this peer on this level */
60.317 + /* how much I am to send to this peer on this level */
60.318 + exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
60.319 + /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
60.320 + /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
60.321 + prev_data[exchange_node->reindex_map[peer]],
60.322 + exchange_node->reindex_map[peer], i+1);*/
60.323 + /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
60.324 + exchange_node->reindex_map[peer],i+1);*/
60.325 + }
60.326 +
60.327 + current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
60.328 + }
60.329 + }
60.330 + }
60.331 +
60.332 +
60.333 + }
60.334 + k_temp1 *= tree_order;
60.335 + k_temp2 *= tree_order;
60.336 + /* debug print */
60.337 + /* fprintf(stderr,"Level %d current data ",i+1);*/
60.338 + for( j = 0; j < num_nodes; j++){
60.339 + /* fprintf(stderr,"%d ",current_data[j]); */
60.340 + prev_data[j] = current_data[j];
60.341 + }
60.342 + /* fprintf(stderr,"\n");*/
60.343 +
60.344 + }
60.345 +
60.346 +
60.347 + /* this is the natural way to do recursive k-ing */
60.348 + /* should never have more than one extra rank per proxy */
60.349 + if( exchange_node->reindex_myid >= stray ){
60.350 + /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
60.351 + exchange_node->node_type = EXTRA_NODE;
60.352 + } else {
60.353 + exchange_node->node_type = EXCHANGE_NODE;
60.354 + }
60.355 +
60.356 + /* set node characteristics - node that is not within the largest
60.357 + * power of tree_order will just send its data to node that will participate
60.358 + * in the recursive k-ing, and get the result back at the end.
60.359 + * set the initial and final data exchanges - those that are not
60.360 + * part of the recursive k-ing.
60.361 + */
60.362 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.363 + exchange_node->n_extra_sources = 0;
60.364 + for( i = stray; i < num_nodes; i++) {
60.365 + if(exchange_node->reindex_myid == ( i - cnt )) {
60.366 + /* then I am a proxy rank and there is only a
60.367 + * single extra source
60.368 + */
60.369 + exchange_node->n_extra_sources = 1;
60.370 + break;
60.371 + }
60.372 + }
60.373 +
60.374 + if (exchange_node->n_extra_sources > 0) {
60.375 + exchange_node->rank_extra_sources_array = (int *) malloc
60.376 + (exchange_node->n_extra_sources * sizeof(int));
60.377 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.378 + goto Error;
60.379 + }
60.380 + /* you broke above */
60.381 + exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
60.382 + } else {
60.383 + exchange_node->rank_extra_sources_array = NULL;
60.384 + }
60.385 + } else {
60.386 + /* I am an extra rank, find my proxy rank */
60.387 + exchange_node->n_extra_sources = 1;
60.388 +
60.389 + exchange_node->rank_extra_sources_array = (int *) malloc
60.390 + (exchange_node->n_extra_sources * sizeof(int));
60.391 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.392 + goto Error;
60.393 + }
60.394 + exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
60.395 + }
60.396 +
60.397 +
60.398 + /* set the exchange pattern */
60.399 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.400 + /* yep, that's right PLUS 1 */
60.401 + exchange_node->n_exchanges = n_levels + 1;
60.402 + /* initialize this */
60.403 + exchange_node->n_actual_exchanges = 0;
60.404 + /* Allocate 2 dimension array thak keeps
60.405 + rank exchange information for each step*/
60.406 + exchange_node->rank_exchanges = (int **) malloc
60.407 + (exchange_node->n_exchanges * sizeof(int *));
60.408 + if(NULL == exchange_node->rank_exchanges) {
60.409 + goto Error;
60.410 + }
60.411 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.412 + exchange_node->rank_exchanges[i] = (int *) malloc
60.413 + ((tree_order - 1) * sizeof(int));
60.414 + if( NULL == exchange_node->rank_exchanges ) {
60.415 + goto Error;
60.416 + }
60.417 + }
60.418 + k_temp1 = tree_order;
60.419 + k_temp2 = 1;
60.420 + /* fill in exchange partners */
60.421 + /* Ok, now we start with the actual algorithm */
60.422 + for( i = 0; i < exchange_node->n_exchanges; i ++) {
60.423 + /*fprintf(stderr,"Starting Level %d\n",i+1);*/
60.424 +
60.425 + FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
60.426 + /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
60.427 + for( j = 0; j < (tree_order-1); j ++ ) {
60.428 + peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
60.429 + if ( peer < stray ) {
60.430 + exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
60.431 + /* an actual exchange occurs, bump the counter */
60.432 +
60.433 + } else {
60.434 + /* out of range, skip it - do not bump the n_actual_exchanges counter */
60.435 + exchange_node->rank_exchanges[i][j] = -1;
60.436 + }
60.437 +
60.438 + }
60.439 + k_temp1 *= tree_order;
60.440 + k_temp2 *= tree_order;
60.441 + }
60.442 + for(i = 0; i < pow_k; i++){
60.443 + for(j = 0; j < (tree_order-1); j++){
60.444 + if(-1 != exchange_node->rank_exchanges[i][j]){
60.445 + /* then bump the counter */
60.446 + exchange_node->n_actual_exchanges++;
60.447 + }
60.448 + }
60.449 + }
60.450 +
60.451 + } else {
60.452 + /* we are extra ranks and we don't participate in the exchange :( */
60.453 + exchange_node->n_exchanges=0;
60.454 + exchange_node->rank_exchanges=NULL;
60.455 + }
60.456 +
60.457 +
60.458 + /* set the number of tags needed per stripe - this must be the
60.459 + * same across all procs in the communicator.
60.460 + */
60.461 + /* do we need this one */
60.462 + exchange_node->n_tags = tree_order * n_levels + 1;
60.463 +
60.464 + free(prev_data);
60.465 + free(current_data);
60.466 + free(group_info);
60.467 +
60.468 + /* successful return */
60.469 + return OMPI_SUCCESS;
60.470 +
60.471 +Error:
60.472 +
60.473 + if (NULL != exchange_node->rank_extra_sources_array) {
60.474 + free(exchange_node->rank_extra_sources_array);
60.475 + }
60.476 +
60.477 + if (NULL != exchange_node->rank_exchanges) {
60.478 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.479 + if (NULL != exchange_node->rank_exchanges[i]) {
60.480 + free(exchange_node->rank_exchanges[i]);
60.481 + }
60.482 + }
60.483 + free(exchange_node->rank_exchanges);
60.484 + }
60.485 +
60.486 + if (NULL != prev_data ){
60.487 + free(prev_data);
60.488 + }
60.489 +
60.490 + if(NULL != current_data) {
60.491 + free(current_data);
60.492 + }
60.493 +
60.494 + if(NULL != group_info) {
60.495 + free(group_info);
60.496 + }
60.497 +
60.498 + /* error return */
60.499 + return OMPI_ERROR;
60.500 +}
60.501 +
60.502 +
60.503 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node(
60.504 + int num_nodes, int node_rank, int tree_order,
60.505 + netpatterns_k_exchange_node_t *exchange_node)
60.506 +{
60.507 + /* local variables */
60.508 + int i, j, tmp, cnt;
60.509 + int n_levels;
60.510 + int k_base, kpow_num, peer;
60.511 +
60.512 + NETPATTERNS_VERBOSE(
60.513 + ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
60.514 + num_nodes, node_rank, tree_order));
60.515 +
60.516 + assert(num_nodes > 1);
60.517 + assert(tree_order > 1);
60.518 + if (tree_order > num_nodes) {
60.519 + tree_order = num_nodes;
60.520 + }
60.521 +
60.522 + exchange_node->tree_order = tree_order;
60.523 +
60.524 + /* figure out number of levels in the tree */
60.525 + n_levels = 0;
60.526 + /* cnt - number of ranks in given level */
60.527 + cnt=1;
60.528 + while ( num_nodes > cnt ) {
60.529 + cnt *= tree_order;
60.530 + n_levels++;
60.531 + };
60.532 +
60.533 + /* figure out the largest power of tree_order that is less than or equal to
60.534 + * num_nodes */
60.535 + if ( cnt > num_nodes) {
60.536 + cnt /= tree_order;
60.537 + n_levels--;
60.538 + }
60.539 +
60.540 + exchange_node->log_tree_order = n_levels;
60.541 + exchange_node->n_largest_pow_tree_order = cnt;
60.542 +
60.543 + /* set node characteristics - node that is not within the largest
60.544 + * power of tree_order will just send it's data to node that will participate
60.545 + * in the recursive doubling, and get the result back at the end.
60.546 + */
60.547 + if (node_rank + 1 > cnt) {
60.548 + exchange_node->node_type = EXTRA_NODE;
60.549 + } else {
60.550 + exchange_node->node_type = EXCHANGE_NODE;
60.551 + }
60.552 +
60.553 +
60.554 + /* set the initial and final data exchanges - those that are not
60.555 + * part of the recursive doubling.
60.556 + */
60.557 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.558 + exchange_node->n_extra_sources = 0;
60.559 + for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
60.560 + tmp < num_nodes && i < tree_order - 1;
60.561 + ++i, ++tmp) {
60.562 + ++exchange_node->n_extra_sources;
60.563 + }
60.564 +
60.565 + assert(exchange_node->n_extra_sources < tree_order);
60.566 +
60.567 + if (exchange_node->n_extra_sources > 0) {
60.568 + exchange_node->rank_extra_sources_array = (int *) malloc
60.569 + (exchange_node->n_extra_sources * sizeof(int));
60.570 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.571 + goto Error;
60.572 + }
60.573 + for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
60.574 + i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
60.575 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
60.576 + exchange_node->rank_extra_sources_array[i] = tmp;
60.577 + }
60.578 + } else {
60.579 + exchange_node->rank_extra_sources_array = NULL;
60.580 + }
60.581 + } else {
60.582 + exchange_node->n_extra_sources = 1;
60.583 + exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
60.584 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.585 + goto Error;
60.586 + }
60.587 + exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
60.588 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
60.589 + exchange_node->rank_extra_sources_array[0] ));
60.590 + }
60.591 +
60.592 + /* set the exchange pattern */
60.593 + if (EXCHANGE_NODE == exchange_node->node_type) {
60.594 + exchange_node->n_exchanges = n_levels;
60.595 + /* Allocate 2 dimension array thak keeps
60.596 + rank exchange information for each step*/
60.597 + exchange_node->rank_exchanges = (int **) malloc
60.598 + (exchange_node->n_exchanges * sizeof(int *));
60.599 + if(NULL == exchange_node->rank_exchanges) {
60.600 + goto Error;
60.601 + }
60.602 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.603 + exchange_node->rank_exchanges[i] = (int *) malloc
60.604 + ((tree_order - 1) * sizeof(int));
60.605 + if( NULL == exchange_node->rank_exchanges ) {
60.606 + goto Error;
60.607 + }
60.608 + }
60.609 + /* fill in exchange partners */
60.610 + for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges;
60.611 + i++, kpow_num *= tree_order) {
60.612 + k_base = node_rank / (kpow_num * tree_order);
60.613 + for(j = 1; j < tree_order; j++) {
60.614 + peer = node_rank + kpow_num * j;
60.615 + if (k_base != peer/(kpow_num * tree_order)) {
60.616 + /* Wraparound the number */
60.617 + peer = k_base * (kpow_num * tree_order) +
60.618 + peer % (kpow_num * tree_order);
60.619 + }
60.620 + exchange_node->rank_exchanges[i][j - 1] = peer;
60.621 + NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d",
60.622 + i, j, tree_order, peer));
60.623 + }
60.624 + }
60.625 + } else {
60.626 + exchange_node->n_exchanges=0;
60.627 + exchange_node->rank_exchanges=NULL;
60.628 + }
60.629 +
60.630 + /* set the number of tags needed per stripe - this must be the
60.631 + * same across all procs in the communicator.
60.632 + */
60.633 + /* do we need this one */
60.634 + exchange_node->n_tags = tree_order * n_levels + 1;
60.635 +
60.636 + /* successful return */
60.637 + return OMPI_SUCCESS;
60.638 +
60.639 +Error:
60.640 +
60.641 + if (NULL != exchange_node->rank_extra_sources_array) {
60.642 + free(exchange_node->rank_extra_sources_array);
60.643 + }
60.644 +
60.645 + if (NULL != exchange_node->rank_exchanges) {
60.646 + for (i = 0; i < exchange_node->n_exchanges; i++) {
60.647 + if (NULL != exchange_node->rank_exchanges[i]) {
60.648 + free(exchange_node->rank_exchanges[i]);
60.649 + }
60.650 + }
60.651 + free(exchange_node->rank_exchanges);
60.652 + }
60.653 +
60.654 + /* error return */
60.655 + return OMPI_ERROR;
60.656 +}
60.657 +
60.658 +#if 1
60.659 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
60.660 + netpatterns_pair_exchange_node_t *exchange_node)
60.661 +{
60.662 + /* local variables */
60.663 + int i, tmp, cnt;
60.664 + int n_levels;
60.665 + int shift, mask;
60.666 +
60.667 + NETPATTERNS_VERBOSE(("Enter netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
60.668 +
60.669 + assert(num_nodes > 1);
60.670 + while (tree_order > num_nodes) {
60.671 + tree_order /= 2;
60.672 + }
60.673 +
60.674 + exchange_node->tree_order = tree_order;
60.675 + /* We support only tree_order that are power of two */
60.676 + assert(0 == (tree_order & (tree_order - 1)));
60.677 +
60.678 + /* figure out number of levels in the tree */
60.679 + n_levels = 0;
60.680 + /* cnt - number of ranks in given level */
60.681 + cnt=1;
60.682 + while ( num_nodes > cnt ) {
60.683 + cnt *= tree_order;
60.684 + n_levels++;
60.685 + };
60.686 +
60.687 + /* figure out the largest power of tree_order that is less than or equal to
60.688 + * num_nodes */
60.689 + if ( cnt > num_nodes) {
60.690 + cnt /= tree_order;
60.691 + n_levels--;
60.692 + }
60.693 + exchange_node->log_tree_order = n_levels;
60.694 + if (2 == tree_order) {
60.695 + exchange_node->log_2 = exchange_node->log_tree_order;
60.696 + }
60.697 +
60.698 + tmp=1;
60.699 + for (i=0 ; i < n_levels ; i++ ) {
60.700 + tmp *= tree_order;
60.701 + }
60.702 + /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
60.703 + assert(tmp == cnt);
60.704 +
60.705 + exchange_node->n_largest_pow_tree_order = tmp;
60.706 + if (2 == tree_order) {
60.707 + exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
60.708 + }
60.709 +
60.710 + /* set node characteristics - node that is not within the largest
60.711 + * power of tree_order will just send it's data to node that will participate
60.712 + * in the recursive doubling, and get the result back at the end.
60.713 + */
60.714 + if ( node_rank + 1 > cnt ) {
60.715 + exchange_node->node_type = EXTRA_NODE;
60.716 + } else {
60.717 + exchange_node->node_type = EXCHANGE_NODE;
60.718 + }
60.719 +
60.720 + /* set the initial and final data exchanges - those that are not
60.721 + * part of the recursive doubling.
60.722 + */
60.723 + if ( EXCHANGE_NODE == exchange_node->node_type ) {
60.724 + exchange_node->n_extra_sources = 0;
60.725 + for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
60.726 + ++exchange_node->n_extra_sources;
60.727 + }
60.728 + if (exchange_node->n_extra_sources > 0) {
60.729 + exchange_node->rank_extra_sources_array = (int *) malloc
60.730 + (exchange_node->n_extra_sources * sizeof(int));
60.731 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.732 + goto Error;
60.733 + }
60.734 + for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
60.735 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
60.736 + exchange_node->rank_extra_sources_array[i] = tmp;
60.737 + }
60.738 + } else {
60.739 + exchange_node->rank_extra_sources_array = NULL;
60.740 + }
60.741 + } else {
60.742 + exchange_node->n_extra_sources = 1;
60.743 + exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
60.744 + if( NULL == exchange_node->rank_extra_sources_array ) {
60.745 + goto Error;
60.746 + }
60.747 + exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
60.748 + NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
60.749 + }
60.750 +
60.751 + /* Ishai: To be compatable with the old structure - should be remoived later */
60.752 + if (1 == exchange_node->n_extra_sources) {
60.753 + exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
60.754 + } else {
60.755 + exchange_node->rank_extra_source = -1;
60.756 + }
60.757 +
60.758 + /* set the exchange pattern */
60.759 + if ( EXCHANGE_NODE == exchange_node->node_type ) {
60.760 + exchange_node->n_exchanges = n_levels * (tree_order - 1);
60.761 + exchange_node->rank_exchanges = (int *) malloc
60.762 + (exchange_node->n_exchanges * sizeof(int));
60.763 + if( NULL == exchange_node->rank_exchanges ) {
60.764 + goto Error;
60.765 + }
60.766 +
60.767 + /* fill in exchange partners */
60.768 + for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
60.769 + for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
60.770 + exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
60.771 + NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
60.772 + }
60.773 + }
60.774 +
60.775 + } else {
60.776 +
60.777 + exchange_node->n_exchanges=0;
60.778 + exchange_node->rank_exchanges=NULL;
60.779 +
60.780 + }
60.781 +
60.782 + /* set the number of tags needed per stripe - this must be the
60.783 + * same across all procs in the communicator.
60.784 + */
60.785 + /* Ishai: Need to find out what is n_tags */
60.786 + exchange_node->n_tags = tree_order * n_levels + 1;
60.787 +
60.788 + /* successful return */
60.789 + return OMPI_SUCCESS;
60.790 +
60.791 +Error:
60.792 + if (exchange_node->rank_extra_sources_array != NULL) {
60.793 + free(exchange_node->rank_extra_sources_array);
60.794 + }
60.795 +
60.796 + /* error return */
60.797 + return OMPI_ERROR;
60.798 +}
60.799 +
60.800 +OMPI_DECLSPEC void netpatterns_free_recursive_doubling_tree_node(
60.801 + netpatterns_pair_exchange_node_t *exchange_node)
60.802 +{
60.803 + NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
60.804 + if (exchange_node->rank_extra_sources_array != NULL) {
60.805 + free(exchange_node->rank_extra_sources_array);
60.806 + }
60.807 +
60.808 + if (exchange_node->rank_exchanges != NULL) {
60.809 + free(exchange_node->rank_exchanges);
60.810 + }
60.811 +}
60.812 +#endif
60.813 +
60.814 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
60.815 + netpatterns_pair_exchange_node_t *exchange_node)
60.816 +{
60.817 + return netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
60.818 +}
60.819 +
60.820 +#if 0
60.821 +/*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
60.822 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
60.823 + netpatterns_pair_exchange_node_t *exchange_node)
60.824 +{
60.825 + /* local variables */
60.826 + /*int tree_order;*/
60.827 + int i,tmp,cnt,result,n_extra_nodes;
60.828 + int n_exchanges;
60.829 +
60.830 + /* figure out number of levels in the tree */
60.831 +
60.832 + n_exchanges=0;
60.833 + result=num_nodes;
60.834 +/* tree_order=2;*/
60.835 + /* cnt - number of ranks in given level */
60.836 + cnt=1;
60.837 + while( num_nodes > cnt ) {
60.838 + cnt*=tree_order;
60.839 + n_exchanges++;
60.840 + };
60.841 +
60.842 + /* figure out the largest power of 2 that is less than or equal to
60.843 + * num_nodes */
60.844 + if( cnt > num_nodes) {
60.845 + cnt/=tree_order;
60.846 + n_exchanges--;
60.847 + }
60.848 + exchange_node->log_2=n_exchanges;
60.849 +
60.850 + tmp=1;
60.851 + for(i=0 ; i < n_exchanges ; i++ ) {
60.852 + tmp*=2;
60.853 + }
60.854 + exchange_node->n_largest_pow_2=tmp;
60.855 +
60.856 + /* set node characteristics - node that is not within the largest
60.857 + * power of 2 will just send it's data to node that will participate
60.858 + * in the recursive doubling, and get the result back at the end.
60.859 + */
60.860 + if( node_rank+1 > cnt ) {
60.861 + exchange_node->node_type=EXTRA_NODE;
60.862 + } else {
60.863 + exchange_node->node_type=EXCHANGE_NODE;
60.864 + }
60.865 +
60.866 + /* set the initial and final data exchanges - those that are not
60.867 + * part of the recursive doubling.
60.868 + */
60.869 + n_extra_nodes=num_nodes-cnt;
60.870 +
60.871 + if ( EXCHANGE_NODE == exchange_node->node_type ) {
60.872 +
60.873 + if( node_rank < n_extra_nodes ) {
60.874 + exchange_node->n_extra_sources=1;
60.875 + exchange_node->rank_extra_source=cnt+node_rank;
60.876 + } else {
60.877 + exchange_node->n_extra_sources=0;
60.878 + exchange_node->rank_extra_source=-1;
60.879 + }
60.880 +
60.881 + } else {
60.882 + exchange_node->n_extra_sources=1;
60.883 + exchange_node->rank_extra_source=node_rank-cnt;
60.884 + }
60.885 +
60.886 + /* set the exchange pattern */
60.887 + if( EXCHANGE_NODE == exchange_node->node_type ) {
60.888 +
60.889 + exchange_node->n_exchanges=n_exchanges;
60.890 + exchange_node->rank_exchanges=(int *) malloc
60.891 + (n_exchanges*sizeof(int));
60.892 + if( NULL == exchange_node->rank_exchanges ) {
60.893 + goto Error;
60.894 + }
60.895 +
60.896 + /* fill in exchange partners */
60.897 + result=1;
60.898 + tmp=node_rank;
60.899 + for( i=0 ; i < n_exchanges ; i++ ) {
60.900 + if(tmp & 1 ) {
60.901 + exchange_node->rank_exchanges[i]=
60.902 + node_rank-result;
60.903 + } else {
60.904 + exchange_node->rank_exchanges[i]=
60.905 + node_rank+result;
60.906 + }
60.907 + result*=2;
60.908 + tmp/=2;
60.909 + }
60.910 +
60.911 + } else {
60.912 +
60.913 + exchange_node->n_exchanges=0;
60.914 + exchange_node->rank_exchanges=NULL;
60.915 +
60.916 + }
60.917 +
60.918 + /* set the number of tags needed per stripe - this must be the
60.919 + * same across all procs in the communicator.
60.920 + */
60.921 + exchange_node->n_tags=2*n_exchanges+1;
60.922 +
60.923 + /* Ishai: to make sure free will work also for people that call this function */
60.924 + exchange_node->rank_extra_sources_array = NULL;
60.925 +
60.926 + /* successful return */
60.927 + return OMPI_SUCCESS;
60.928 +
60.929 +Error:
60.930 +
60.931 + /* error return */
60.932 + return OMPI_ERROR;
60.933 +}
60.934 +#endif
60.935 +
61.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
61.2 +++ b/ompi/patterns/net/netpatterns_knomial_tree.h Tue Feb 19 22:50:56 2013 +0000
61.3 @@ -0,0 +1,254 @@
61.4 +/*
61.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
61.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
61.7 + * Copyright (c) 2012 Los Alamos National Security, LLC.
61.8 + * All rights reserved.
61.9 + * $COPYRIGHT$
61.10 + *
61.11 + * Additional copyrights may follow
61.12 + *
61.13 + * $HEADER$
61.14 + */
61.15 +
61.16 +#ifndef COMM_PATTERNS_KNOMIAL_TREE_H
61.17 +#define COMM_PATTERNS_KNOMIAL_TREE_H
61.18 +
61.19 +#include "ompi_config.h"
61.20 +
61.21 +BEGIN_C_DECLS
61.22 +
61.23 +
61.24 +/*
61.25 + * Pair-wise data exchange
61.26 + */
61.27 +
61.28 +/* enum for node type */
61.29 +enum {
61.30 + EXCHANGE_NODE,
61.31 + EXTRA_NODE
61.32 +};
61.33 +
61.34 +struct netpatterns_pair_exchange_node_t {
61.35 +
61.36 + /* Order of a node in the tree - usually 2 */
61.37 + int tree_order;
61.38 +
61.39 + /* number of nodes this node will exchange data with */
61.40 + int n_exchanges;
61.41 +
61.42 + /* ranks of nodes involved in data exchnge */
61.43 + int *rank_exchanges;
61.44 +
61.45 + /* number of extra sources of data - outside largest power of 2 in
61.46 + * this group */
61.47 + int n_extra_sources;
61.48 +
61.49 + /* rank of the extra source */
61.50 + /* deprecated */ int rank_extra_source;
61.51 + int *rank_extra_sources_array;
61.52 +
61.53 + /* number of tags needed per stripe */
61.54 + int n_tags;
61.55 +
61.56 + /* log 2 of largest full power of 2 for this node set */
61.57 + /* deprecated */ int log_2;
61.58 + int log_tree_order;
61.59 +
61.60 + /* largest power of 2 that fits in this group */
61.61 + /* deprecated */ int n_largest_pow_2;
61.62 + int n_largest_pow_tree_order;
61.63 +
61.64 + /* node type */
61.65 + int node_type;
61.66 +
61.67 +};
61.68 +typedef struct netpatterns_pair_exchange_node_t netpatterns_pair_exchange_node_t;
61.69 +
61.70 +struct netpatterns_payload_t {
61.71 + int s_len;
61.72 + int r_len;
61.73 + int s_offset;
61.74 + int r_offset;
61.75 +};
61.76 +typedef struct netpatterns_payload_t netpatterns_payload_t;
61.77 +
61.78 +struct netpatterns_k_exchange_node_t {
61.79 + /* Order of a node in the tree - usually 2 */
61.80 + int tree_order;
61.81 + /* number of nodes this node will exchange data with */
61.82 + int n_exchanges;
61.83 + /* total number of exchanges that I actually participate in */
61.84 + int n_actual_exchanges;
61.85 + /* ranks of nodes involved in data exchnge */
61.86 + int **rank_exchanges;
61.87 + /* number of extra sources of data - outside largest power of 2 in
61.88 + * this group */
61.89 + int n_extra_sources;
61.90 + /* rank/s of the extra source */
61.91 + int *rank_extra_sources_array;
61.92 + /* number of tags needed per stripe */
61.93 + int n_tags;
61.94 + /* log k of largest full power of k for this node set */
61.95 + int log_tree_order;
61.96 + /* largest power of k that fits in this group */
61.97 + int n_largest_pow_tree_order;
61.98 + /* node type */
61.99 + int node_type;
61.100 + /* start of extra ranks k_nomial */
61.101 + int k_nomial_stray;
61.102 + /* reindex map */
61.103 + int *reindex_map;
61.104 + /* inverse of reindex map, i.e. given a reindexed id find out its actual rank */
61.105 + int *inv_reindex_map;
61.106 + /* reindexed node_rank */
61.107 + int reindex_myid;
61.108 + /* 2-d array that hold payload info for each level of recursive k-ing */
61.109 + netpatterns_payload_t **payload_info;
61.110 +};
61.111 +typedef struct netpatterns_k_exchange_node_t
61.112 + netpatterns_k_exchange_node_t;
61.113 +
61.114 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
61.115 + netpatterns_pair_exchange_node_t *exchange_node);
61.116 +
61.117 +OMPI_DECLSPEC void netpatterns_free_recursive_doubling_tree_node(
61.118 + netpatterns_pair_exchange_node_t *exchange_node);
61.119 +
61.120 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
61.121 + netpatterns_pair_exchange_node_t *exchange_node);
61.122 +
61.123 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node(
61.124 + int num_nodes, int node_rank, int tree_order,
61.125 + netpatterns_k_exchange_node_t *exchange_node);
61.126 +
61.127 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node(
61.128 + int num_nodes, int node_rank, int tree_order, int *hier_ranks,
61.129 + netpatterns_k_exchange_node_t *exchange_node);
61.130 +
61.131 +
61.132 +/* Input: k_exchange_node structure
61.133 + Output: index in rank_exchanges array that points
61.134 + to the "start_point" for outgoing send.
61.135 +
61.136 + Please see below example of usage:
61.137 + for (i = start_point ; i > 0; i--)
61.138 + for (k = 0; k < tree_radix; k++)
61.139 + send messages to exchange_node->rank_exchanges[i][k];
61.140 +*/
61.141 +
61.142 +static inline __opal_attribute_always_inline__
61.143 +int netpatterns_get_knomial_level(
61.144 + int my_rank, int src_rank,
61.145 + int radix, int size,
61.146 + int *k_level)
61.147 +{
61.148 + int distance,
61.149 + pow_k;
61.150 + int logk_level = 0;
61.151 +
61.152 + /* Calculate disctance from source of data */
61.153 + distance = src_rank - my_rank;
61.154 +
61.155 + /* Wrap around */
61.156 + if (0 > distance) {
61.157 + distance += size;
61.158 + }
61.159 +
61.160 + pow_k = 1;
61.161 + while(distance / (pow_k * radix)) {
61.162 + pow_k *= radix;
61.163 + ++logk_level;
61.164 + }
61.165 + --logk_level;
61.166 +
61.167 + *k_level = pow_k;
61.168 + return logk_level;
61.169 +}
61.170 +
61.171 +/* Input: my_rank, root, radix, size
61.172 + * Output: source of the data, offset in power of K
61.173 + */
61.174 +static inline __opal_attribute_always_inline__
61.175 +int netpatterns_get_knomial_data_source(
61.176 + int my_rank, int root, int radix, int size,
61.177 + int *k_level, int *logk_level)
61.178 +{
61.179 + int level = radix;
61.180 + int step = 0;
61.181 +
61.182 + /* Calculate source of the data */
61.183 + while((0 == (root - my_rank) % level)
61.184 + && (level <= size)) {
61.185 + level *= radix;
61.186 + ++step;
61.187 + }
61.188 +
61.189 + *k_level = level/radix;
61.190 + *logk_level = step;
61.191 + return my_rank - (my_rank % level - root % level);
61.192 +}
61.193 +
61.194 +/* Input: my_rank, radix,
61.195 + * k_level - that you get from netpatterns_get_knomial_data_source
61.196 + * k_step - some integer
61.197 + * Output: peer - next children in the tree
61.198 + * Usage:
61.199 + * src = netpatterns_get_knomial_data_source(
61.200 + * my_rank, root, radix, size,
61.201 + * &k_level, &logk_level)
61.202 + * recv_from(src......);
61.203 + *
61.204 + * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, k_level, my_rank);
61.205 + * while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
61.206 + * MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer);
61.207 + * send_to(peer....);
61.208 + * }
61.209 + * for more example please grep in ptpcoll bcol bcast files
61.210 + */
61.211 +
61.212 +typedef struct netpatterns_knomial_step_info_t {
61.213 + int k_step;
61.214 + int k_level;
61.215 + int k_tmp_peer;
61.216 +} netpatterns_knomial_step_info_t;
61.217 +
61.218 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix)\
61.219 +do { \
61.220 + if (1 != step_info.k_step) { \
61.221 + step_info.k_level /= radix; \
61.222 + } \
61.223 +} while (0) \
61.224 +
61.225 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, in_k_level, in_peer)\
61.226 +do { \
61.227 + step_info.k_step = 1; \
61.228 + step_info.k_level = in_k_level; \
61.229 + step_info.k_tmp_peer = in_peer; \
61.230 +} while (0)
61.231 +
61.232 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info) \
61.233 + (step_info.k_level > 1)
61.234 +
61.235 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer) \
61.236 +do { \
61.237 + int rank_radix_base = my_rank/step_info.k_level; \
61.238 + \
61.239 + peer = step_info.k_tmp_peer + step_info.k_level/radix; \
61.240 + if (rank_radix_base != peer/step_info.k_level) { \
61.241 + /* Wraparound the number */ \
61.242 + peer -= step_info.k_level; \
61.243 + assert(peer >=0); \
61.244 + } \
61.245 + ++step_info.k_step; \
61.246 + if (radix == step_info.k_step) { \
61.247 + step_info.k_level /= radix; \
61.248 + step_info.k_step = 1; \
61.249 + step_info.k_tmp_peer = my_rank; \
61.250 + } else { \
61.251 + step_info.k_tmp_peer = peer; \
61.252 + } \
61.253 + \
61.254 +} while (0)
61.255 +
61.256 +END_C_DECLS
61.257 +#endif
62.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
62.2 +++ b/ompi/patterns/net/netpatterns_multinomial_tree.c Tue Feb 19 22:50:56 2013 +0000
62.3 @@ -0,0 +1,190 @@
62.4 +/*
62.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
62.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
62.7 + * $COPYRIGHT$
62.8 + *
62.9 + * Additional copyrights may follow
62.10 + *
62.11 + * $HEADER$
62.12 + */
62.13 +
62.14 +#include "ompi_config.h"
62.15 +#ifdef HAVE_UNISTD_H
62.16 +#include <unistd.h>
62.17 +#endif
62.18 +#include <sys/types.h>
62.19 +#ifdef HAVE_SYS_MMAN_H
62.20 +#include <sys/mman.h>
62.21 +#endif
62.22 +#include <fcntl.h>
62.23 +#include <stdlib.h>
62.24 +
62.25 +#include "ompi/constants.h"
62.26 +#include "netpatterns.h"
62.27 +
62.28 +
62.29 +/* setup an multi-nomial tree - for each node in the tree
62.30 + * this returns it's parent, and it's children */
62.31 +
62.32 +OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
62.33 + netpatterns_tree_node_t *tree_nodes)
62.34 +{
62.35 + /* local variables */
62.36 + int i,result;
62.37 + int cnt, n_nodes_in_this_level,node_index;
62.38 + int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
62.39 + int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
62.40 + int n_lvls_in_tree;
62.41 +
62.42 + /* sanity check */
62.43 + if( 1 >= tree_order ) {
62.44 + goto Error;
62.45 + }
62.46 +
62.47 +
62.48 + /* figure out number of levels in the tree */
62.49 +
62.50 + n_lvls_in_tree=0;
62.51 + result=num_nodes;
62.52 + /* cnt - number of ranks in given level */
62.53 + cnt=1;
62.54 + /* cummulative count of ranks */
62.55 + while( 0 < result ) {
62.56 + result-=cnt;
62.57 + cnt*=tree_order;
62.58 + n_lvls_in_tree++;
62.59 + };
62.60 +
62.61 + /* loop over tree levels */
62.62 + n_nodes_in_this_level=1;
62.63 + node_index=-1;
62.64 + n_cum_nodes=0;
62.65 + for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
62.66 +
62.67 + /* loop over nodes in current level */
62.68 + for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
62.69 + /* get node index */
62.70 + node_index++;
62.71 +
62.72 + /* break if reach group size */
62.73 + if( node_index == num_nodes) {
62.74 + break;
62.75 + }
62.76 +
62.77 + tree_nodes[node_index].my_rank=node_index;
62.78 + tree_nodes[node_index].children_ranks=NULL;
62.79 +
62.80 + /*
62.81 + * Parents
62.82 + */
62.83 + if( 0 == current_level ) {
62.84 + tree_nodes[node_index].n_parents=0;
62.85 + /* get parent index */
62.86 + tree_nodes[node_index].parent_rank=-1;
62.87 + } else {
62.88 + tree_nodes[node_index].n_parents=1;
62.89 + /* get parent index */
62.90 + n_nodes_prev_level=n_nodes_in_this_level/tree_order;
62.91 + if( current_level == n_lvls_in_tree -1 ) {
62.92 + /* load balance the lowest level */
62.93 + parent_rank=node-
62.94 + (node/n_nodes_prev_level)*n_nodes_prev_level;
62.95 + parent_rank=n_cum_nodes-n_nodes_prev_level+
62.96 + parent_rank;
62.97 + tree_nodes[node_index].parent_rank=parent_rank;
62.98 + } else {
62.99 + tree_nodes[node_index].parent_rank=
62.100 + (n_cum_nodes-n_nodes_prev_level)+node/tree_order;
62.101 + }
62.102 + }
62.103 +
62.104 + /*
62.105 + * Children
62.106 + */
62.107 +
62.108 + /* get number of children */
62.109 + if( (n_lvls_in_tree-1) == current_level ) {
62.110 + /* leaves have no nodes */
62.111 + tree_nodes[node_index].n_children=0;
62.112 + tree_nodes[node_index].children_ranks=NULL;
62.113 + } else {
62.114 + /* take into account last level being incomplete */
62.115 + if( (n_lvls_in_tree-2) == current_level ) {
62.116 + /* last level is load balanced */
62.117 + n_nodes_in_last_level=num_nodes-
62.118 + (n_cum_nodes+n_nodes_in_this_level);
62.119 + n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
62.120 + n_in_partial_stipe=n_nodes_in_last_level-
62.121 + n_full_stripes*n_nodes_in_this_level;
62.122 + n_children=n_full_stripes;
62.123 + if( n_full_stripes < tree_order ) {
62.124 + if( node <= n_in_partial_stipe-1 ) {
62.125 + n_children++;
62.126 + }
62.127 + }
62.128 + tree_nodes[node_index].n_children=n_children;
62.129 + if( 0 < n_children ) {
62.130 + tree_nodes[node_index].children_ranks=(int *)
62.131 + malloc(sizeof(int)*n_children);
62.132 + if( NULL == tree_nodes[node_index].children_ranks) {
62.133 + goto Error;
62.134 + }
62.135 + } else {
62.136 + tree_nodes[node_index].children_ranks=NULL;
62.137 + }
62.138 + /* fill in list */
62.139 + for( rank=0 ; rank < n_children ; rank++ ) {
62.140 + tree_nodes[node_index].children_ranks[rank]=
62.141 + node+rank*n_nodes_in_this_level;
62.142 + tree_nodes[node_index].children_ranks[rank]+=
62.143 + (n_cum_nodes+n_nodes_in_this_level);
62.144 + }
62.145 + } else {
62.146 + n_children=tree_order;
62.147 + tree_nodes[node_index].n_children=tree_order;
62.148 + tree_nodes[node_index].children_ranks=(int *)
62.149 + malloc(sizeof(int)*n_children);
62.150 + if( NULL == tree_nodes[node_index].children_ranks) {
62.151 + goto Error;
62.152 + }
62.153 + for( rank=0 ; rank < n_children ; rank++ ) {
62.154 + tree_nodes[node_index].children_ranks[rank]=
62.155 + rank+tree_order*node;
62.156 + tree_nodes[node_index].children_ranks[rank]+=
62.157 + (n_cum_nodes+n_nodes_in_this_level);
62.158 + }
62.159 + }
62.160 + }
62.161 +
62.162 + } /* end node loop */
62.163 +
62.164 + /* update helper counters */
62.165 + n_cum_nodes+=n_nodes_in_this_level;
62.166 + n_nodes_in_this_level*=tree_order;
62.167 + }
62.168 +
62.169 + /* set node type */
62.170 + for(i=0 ; i < num_nodes ; i++ ) {
62.171 + if( 0 == tree_nodes[i].n_parents ) {
62.172 + tree_nodes[i].my_node_type=ROOT_NODE;
62.173 + } else if ( 0 == tree_nodes[i].n_children ) {
62.174 + tree_nodes[i].my_node_type=LEAF_NODE;
62.175 + } else {
62.176 + tree_nodes[i].my_node_type=INTERIOR_NODE;
62.177 + }
62.178 + }
62.179 +
62.180 + /* successful return */
62.181 + return OMPI_SUCCESS;
62.182 +
62.183 +Error:
62.184 + /* free allocated memory */
62.185 + for( i=0 ; i < num_nodes ; i++ ) {
62.186 + if( NULL != tree_nodes[i].children_ranks ) {
62.187 + free(tree_nodes[i].children_ranks);
62.188 + }
62.189 + }
62.190 +
62.191 + /* error return */
62.192 + return OMPI_ERROR;
62.193 +}
63.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
63.2 +++ b/ompi/patterns/net/netpatterns_nary_tree.c Tue Feb 19 22:50:56 2013 +0000
63.3 @@ -0,0 +1,443 @@
63.4 +/*
63.5 + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
63.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
63.7 + * $COPYRIGHT$
63.8 + *
63.9 + * Additional copyrights may follow
63.10 + *
63.11 + * $HEADER$
63.12 + */
63.13 +
63.14 +#include "ompi_config.h"
63.15 +#ifdef HAVE_UNISTD_H
63.16 +#include <unistd.h>
63.17 +#endif
63.18 +#include <sys/types.h>
63.19 +#ifdef HAVE_SYS_MMAN_H
63.20 +#include <sys/mman.h>
63.21 +#endif
63.22 +#include <fcntl.h>
63.23 +#include <errno.h>
63.24 +#include <stdlib.h>
63.25 +#include <stdio.h>
63.26 +
63.27 +#include "ompi/constants.h"
63.28 +#include "netpatterns.h"
63.29 +
63.30 +/*
63.31 + * Create mmaped shared file
63.32 + */
63.33 +
63.34 +/* setup an n-array tree */
63.35 +
63.36 +int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
63.37 + netpatterns_tree_node_t *my_node)
63.38 +{
63.39 + /* local variables */
63.40 + int n_levels, result;
63.41 + int my_level_in_tree, cnt;
63.42 + int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
63.43 + int start_index,end_index;
63.44 +
63.45 + /* sanity check */
63.46 + if( 1 >= tree_order ) {
63.47 + goto Error;
63.48 + }
63.49 +
63.50 + my_node->my_rank=my_rank;
63.51 + my_node->tree_size=num_nodes;
63.52 +
63.53 + /* figure out number of levels in tree */
63.54 + n_levels=0;
63.55 + result=num_nodes-1;
63.56 + while (0 < result ) {
63.57 + result/=tree_order;
63.58 + n_levels++;
63.59 + };
63.60 +
63.61 + /* figure out who my children and parents are */
63.62 + my_level_in_tree=-1;
63.63 + result=my_rank;
63.64 + /* cnt - number of ranks in given level */
63.65 + cnt=1;
63.66 + /* cummulative count of ranks */
63.67 + while( 0 <= result ) {
63.68 + result-=cnt;
63.69 + cnt*=tree_order;
63.70 + my_level_in_tree++;
63.71 + };
63.72 + /* int my_level_in_tree, n_children, n_parents; */
63.73 +
63.74 + if( 0 == my_rank ) {
63.75 + my_node->n_parents=0;
63.76 + my_node->parent_rank=-1;
63.77 + my_rank_in_my_level=0;
63.78 + } else {
63.79 + my_node->n_parents=1;
63.80 + cnt=1;
63.81 + cum_cnt=0;
63.82 + for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
63.83 + /* cummulative count up to this level */
63.84 + cum_cnt+=cnt;
63.85 + /* number of ranks in this level */
63.86 + cnt*=tree_order;
63.87 + }
63.88 + my_rank_in_my_level=my_rank-cum_cnt;
63.89 + /* tree_order consecutive ranks have the same parent */
63.90 + my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
63.91 + }
63.92 +
63.93 + /* figure out number of levels in the tree */
63.94 + n_lvls_in_tree=0;
63.95 + result=num_nodes;
63.96 + /* cnt - number of ranks in given level */
63.97 + cnt=1;
63.98 + /* cummulative count of ranks */
63.99 + while( 0 < result ) {
63.100 + result-=cnt;
63.101 + cnt*=tree_order;
63.102 + n_lvls_in_tree++;
63.103 + };
63.104 +
63.105 + my_node->children_ranks=(int *)NULL;
63.106 +
63.107 + /* get list of children */
63.108 + if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
63.109 + /* last level has no children */
63.110 + my_node->n_children=0;
63.111 + } else {
63.112 + cum_cnt=0;
63.113 + cnt=1;
63.114 + for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
63.115 + cum_cnt+=cnt;
63.116 + cnt*=tree_order;
63.117 + }
63.118 + start_index=cum_cnt+my_rank_in_my_level*tree_order;
63.119 + end_index=start_index+tree_order-1;
63.120 +
63.121 + /* don't go out of bounds at the end of the list */
63.122 + if( end_index >= num_nodes ) {
63.123 + end_index = num_nodes-1;
63.124 + }
63.125 +
63.126 + if( start_index <= (num_nodes-1) ) {
63.127 + my_node->n_children=end_index-start_index+1;
63.128 + } else {
63.129 + my_node->n_children=0;
63.130 + }
63.131 +
63.132 + my_node->children_ranks=NULL;
63.133 + if( 0 < my_node->n_children ) {
63.134 + my_node->children_ranks=
63.135 + (int *)malloc( sizeof(int)*my_node->n_children);
63.136 + if( NULL == my_node->children_ranks) {
63.137 + goto Error;
63.138 + }
63.139 + for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
63.140 + my_node->children_ranks[lvl-start_index]=lvl;
63.141 + }
63.142 + }
63.143 + }
63.144 + /* set node type */
63.145 + if( 0 == my_node->n_parents ) {
63.146 + my_node->my_node_type=ROOT_NODE;
63.147 + } else if ( 0 == my_node->n_children ) {
63.148 + my_node->my_node_type=LEAF_NODE;
63.149 + } else {
63.150 + my_node->my_node_type=INTERIOR_NODE;
63.151 + }
63.152 +
63.153 +
63.154 + /* successful return */
63.155 + return OMPI_SUCCESS;
63.156 +
63.157 +Error:
63.158 +
63.159 + /* error return */
63.160 + return OMPI_ERROR;
63.161 +}
63.162 +
63.163 +int netpatterns_setup_narray_knomial_tree(
63.164 + int tree_order, int my_rank, int num_nodes,
63.165 + netpatterns_narray_knomial_tree_node_t *my_node)
63.166 +{
63.167 + /* local variables */
63.168 + int n_levels, result;
63.169 + int my_level_in_tree, cnt ;
63.170 + int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
63.171 + int start_index,end_index;
63.172 + int rc;
63.173 +
63.174 + /* sanity check */
63.175 + if( 1 >= tree_order ) {
63.176 + goto Error;
63.177 + }
63.178 +
63.179 + my_node->my_rank=my_rank;
63.180 + my_node->tree_size=num_nodes;
63.181 +
63.182 + /* figure out number of levels in tree */
63.183 + n_levels=0;
63.184 + result=num_nodes-1;
63.185 + while (0 < result ) {
63.186 + result/=tree_order;
63.187 + n_levels++;
63.188 + };
63.189 +
63.190 + /* figure out who my children and parents are */
63.191 + my_level_in_tree=-1;
63.192 + result=my_rank;
63.193 + /* cnt - number of ranks in given level */
63.194 + cnt=1;
63.195 + /* cummulative count of ranks */
63.196 + while( 0 <= result ) {
63.197 + result-=cnt;
63.198 + cnt*=tree_order;
63.199 + my_level_in_tree++;
63.200 + };
63.201 + /* int my_level_in_tree, n_children, n_parents; */
63.202 +
63.203 + if( 0 == my_rank ) {
63.204 + my_node->n_parents=0;
63.205 + my_node->parent_rank=-1;
63.206 + my_rank_in_my_level=0;
63.207 + } else {
63.208 + my_node->n_parents=1;
63.209 + cnt=1;
63.210 + cum_cnt=0;
63.211 + for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
63.212 + /* cummulative count up to this level */
63.213 + cum_cnt+=cnt;
63.214 + /* number of ranks in this level */
63.215 + cnt*=tree_order;
63.216 + }
63.217 +
63.218 + my_node->rank_on_level =
63.219 + my_rank_in_my_level =
63.220 + my_rank-cum_cnt;
63.221 + my_node->level_size = cnt;
63.222 +
63.223 + rc = netpatterns_setup_recursive_knomial_tree_node(
63.224 + my_node->level_size, my_node->rank_on_level,
63.225 + tree_order, &my_node->k_node);
63.226 + if (OMPI_SUCCESS != rc) {
63.227 + goto Error;
63.228 + }
63.229 +
63.230 + /* tree_order consecutive ranks have the same parent */
63.231 + my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
63.232 + }
63.233 +
63.234 + /* figure out number of levels in the tree */
63.235 + n_lvls_in_tree=0;
63.236 + result=num_nodes;
63.237 + /* cnt - number of ranks in given level */
63.238 + cnt=1;
63.239 + /* cummulative count of ranks */
63.240 + while( 0 < result ) {
63.241 + result-=cnt;
63.242 + cnt*=tree_order;
63.243 + n_lvls_in_tree++;
63.244 + };
63.245 +
63.246 + if(result < 0) {
63.247 + /* reset the size on group */
63.248 + num_nodes = cnt / tree_order;
63.249 + }
63.250 +
63.251 + my_node->children_ranks=(int *)NULL;
63.252 +
63.253 + /* get list of children */
63.254 + if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
63.255 + /* last level has no children */
63.256 + my_node->n_children=0;
63.257 + } else {
63.258 + cum_cnt=0;
63.259 + cnt=1;
63.260 + for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
63.261 + cum_cnt+=cnt;
63.262 + cnt*=tree_order;
63.263 + }
63.264 + start_index=cum_cnt+my_rank_in_my_level*tree_order;
63.265 + end_index=start_index+tree_order-1;
63.266 +
63.267 + /* don't go out of bounds at the end of the list */
63.268 + if( end_index >= num_nodes ) {
63.269 + end_index = num_nodes-1;
63.270 + }
63.271 +
63.272 + if( start_index <= (num_nodes-1) ) {
63.273 + my_node->n_children=end_index-start_index+1;
63.274 + } else {
63.275 + my_node->n_children=0;
63.276 + }
63.277 +
63.278 + my_node->children_ranks=NULL;
63.279 + if( 0 < my_node->n_children ) {
63.280 + my_node->children_ranks=
63.281 + (int *)malloc( sizeof(int)*my_node->n_children);
63.282 + if( NULL == my_node->children_ranks) {
63.283 + goto Error;
63.284 + }
63.285 + for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
63.286 + my_node->children_ranks[lvl-start_index]=lvl;
63.287 + }
63.288 + }
63.289 + }
63.290 + /* set node type */
63.291 + if( 0 == my_node->n_parents ) {
63.292 + my_node->my_node_type=ROOT_NODE;
63.293 + } else if ( 0 == my_node->n_children ) {
63.294 + my_node->my_node_type=LEAF_NODE;
63.295 + } else {
63.296 + my_node->my_node_type=INTERIOR_NODE;
63.297 + }
63.298 +
63.299 +
63.300 + /* successful return */
63.301 + return OMPI_SUCCESS;
63.302 +
63.303 +Error:
63.304 +
63.305 + /* error return */
63.306 + return OMPI_ERROR;
63.307 +}
63.308 +
63.309 +/* calculate the nearest power of radix that is equal to or greater
63.310 + * than size, with the specified radix. The resulting tree is of
63.311 + * depth n_lvls.
63.312 + */
63.313 +OMPI_DECLSPEC int roundup_to_power_radix ( int radix, int size, int *n_lvls )
63.314 +{
63.315 + int n_levels=0, return_value=1;
63.316 + int result;
63.317 + if( 1 > size ) {
63.318 + return 0;
63.319 + }
63.320 +
63.321 + result=size-1;
63.322 + while (0 < result ) {
63.323 + result/=radix;
63.324 + n_levels++;
63.325 + return_value*=radix;
63.326 + };
63.327 + *n_lvls=n_levels;
63.328 + return return_value;
63.329 +}
63.330 +
63.331 +static int fill_in_node_data(int tree_order, int num_nodes, int my_node,
63.332 + netpatterns_tree_node_t *nodes_data)
63.333 +{
63.334 + /* local variables */
63.335 + int rc, num_ranks_per_child, num_children, n_extra;
63.336 + int child, rank, n_to_offset, n_ranks_to_child;
63.337 +
63.338 + /* figure out who are my children */
63.339 + num_ranks_per_child=num_nodes/tree_order;
63.340 + if( num_ranks_per_child ) {
63.341 + num_children=tree_order;
63.342 + n_extra=num_nodes-num_ranks_per_child*tree_order;
63.343 + } else {
63.344 + num_children=num_nodes;
63.345 + /* each child has the same number of descendents - 1 */
63.346 + n_extra=0;
63.347 + /* when there is a child, there is at least one
63.348 + * descendent */
63.349 + num_ranks_per_child=1;
63.350 + }
63.351 +
63.352 + nodes_data[my_node].n_children=num_children;
63.353 + if( num_children ) {
63.354 + nodes_data[my_node].children_ranks=(int *)
63.355 + malloc(sizeof(int)*num_children);
63.356 + if(!nodes_data[my_node].children_ranks) {
63.357 +
63.358 + if ( NULL == nodes_data[my_node].children_ranks )
63.359 + {
63.360 + fprintf(stderr, "Cannot allocate memory for children_ranks.\n");
63.361 + rc = OMPI_ERR_OUT_OF_RESOURCE;
63.362 + goto error;
63.363 + }
63.364 + }
63.365 + }
63.366 +
63.367 + rank = my_node;
63.368 + for( child=0 ; child < num_children ; child ++ ) {
63.369 +
63.370 + /* set parent information */
63.371 + nodes_data[rank].n_parents=1;
63.372 + nodes_data[rank].parent_rank=my_node;
63.373 + if( n_extra ) {
63.374 + n_to_offset=child;
63.375 + if( n_to_offset > n_extra){
63.376 + n_to_offset=n_extra;
63.377 + }
63.378 + } else {
63.379 + n_to_offset=0;
63.380 + }
63.381 +
63.382 + rank=my_node+1+child*num_ranks_per_child;
63.383 + rank+=n_to_offset;
63.384 +
63.385 + /* set parent information */
63.386 + nodes_data[rank].n_parents=1;
63.387 + nodes_data[rank].parent_rank=my_node;
63.388 +
63.389 + n_ranks_to_child=num_ranks_per_child;
63.390 + if(n_extra && (child < n_extra) ) {
63.391 + n_ranks_to_child++;
63.392 + }
63.393 +
63.394 + /* set child information */
63.395 + nodes_data[my_node].children_ranks[child]=rank;
63.396 +
63.397 + /* remove the child from the list of ranks */
63.398 + n_ranks_to_child--;
63.399 + rc=fill_in_node_data(tree_order, n_ranks_to_child, rank, nodes_data);
63.400 + if( OMPI_SUCCESS != rc ) {
63.401 + goto error;
63.402 + }
63.403 +
63.404 + }
63.405 +
63.406 + /* return */
63.407 + return OMPI_SUCCESS;
63.408 +
63.409 + /* Error */
63.410 +error:
63.411 + return rc;
63.412 +
63.413 +}
63.414 +
63.415 +/*
63.416 + * This routine sets up the array describing the communication tree for
63.417 + * a k-ary tree where the children form a contiguous range of ranks at
63.418 + * each level. The assumption here is that rank 0 is always the root -
63.419 + * ranks may be rotated based on who the actual root is, to obtain the
63.420 + * appropriate communication pattern for such roots.
63.421 + */
63.422 +OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(
63.423 + int tree_order, int num_nodes,
63.424 + netpatterns_tree_node_t **tree_nodes)
63.425 +{
63.426 + /* local variables */
63.427 + int num_descendent_ranks=num_nodes-1;
63.428 + int rc=OMPI_SUCCESS;
63.429 +
63.430 + *tree_nodes=(netpatterns_tree_node_t *)malloc(
63.431 + sizeof(netpatterns_tree_node_t)*
63.432 + num_nodes);
63.433 + if(!(*tree_nodes) ) {
63.434 + fprintf(stderr, "Cannot allocate memory for tree_nodes.\n");
63.435 + rc = OMPI_ERR_OUT_OF_RESOURCE;
63.436 + return rc;
63.437 + }
63.438 +
63.439 + (*tree_nodes)[0].n_parents=0;
63.440 + rc=fill_in_node_data(tree_order,
63.441 + num_descendent_ranks, 0, *tree_nodes);
63.442 +
63.443 + /* successful return */
63.444 + return rc;
63.445 +
63.446 +}