Fixes #3506: Please move r28035 to 1.7 v1.7
authorjsquyres
Tue Feb 19 22:50:56 2013 +0000 (2 months ago)
branchv1.7
changeset 21418a30e81e61730
parent 21417 c412ef2fcb1a
child 21424 d7b127648df6
Fixes #3506: Please move r28035 to 1.7

---svn-pre-commit-ignore-below---

Custom patch on the ticket based on:

r3485 [[BR]]
* Fix windows makefile to deal with spaces in topdir
* offset time to Unix epoch so that gettimeofday returns sane values
on windows
* attempted hack at making non-blocking I/O work for iovecs in
Windows
ompi/Makefile.am
ompi/mca/bcol/basesmuma/Makefile.am
ompi/mca/bcol/basesmuma/bcol_basesmuma.h
ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c
ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c
ompi/mca/bcol/bcol.h
ompi/mca/bcol/iboffload/bcol_iboffload.h
ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c
ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c
ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h
ompi/mca/bcol/iboffload/bcol_iboffload_module.c
ompi/mca/bcol/ptpcoll/Makefile.am
ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h
ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c
ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c
ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h
ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c
ompi/mca/coll/ml/Makefile.am
ompi/mca/coll/ml/coll_ml_component.c
ompi/mca/coll/ml/coll_ml_custom_utils.c
ompi/mca/coll/ml/coll_ml_mca.c
ompi/mca/coll/ml/coll_ml_module.c
ompi/mca/common/commpatterns/.windows
ompi/mca/common/commpatterns/Makefile.am
ompi/mca/common/commpatterns/common_allgather.c
ompi/mca/common/commpatterns/common_allreduce.c
ompi/mca/common/commpatterns/common_bcast.c
ompi/mca/common/commpatterns/common_coll_ops.h
ompi/mca/common/commpatterns/common_netpatterns.h
ompi/mca/common/commpatterns/ompi_common_netpatterns_macros.h
ompi/mca/common/netpatterns/.windows
ompi/mca/common/netpatterns/Makefile.am
ompi/mca/common/netpatterns/common_allreduce.c
ompi/mca/common/netpatterns/common_coll_ops.h
ompi/mca/common/netpatterns/common_netpatterns.h
ompi/mca/common/netpatterns/common_netpatterns_base.c
ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.c
ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h
ompi/mca/common/netpatterns/common_netpatterns_multinomial_tree.c
ompi/mca/common/netpatterns/common_netpatterns_nary_tree.c
ompi/mca/sbgp/basesmsocket/Makefile.am
ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c
ompi/patterns/comm/Makefile.am
ompi/patterns/comm/allgather.c
ompi/patterns/comm/allreduce.c
ompi/patterns/comm/bcast.c
ompi/patterns/comm/coll_ops.h
ompi/patterns/comm/commpatterns.h
ompi/patterns/net/Makefile.am
ompi/patterns/net/allreduce.c
ompi/patterns/net/coll_ops.h
ompi/patterns/net/netpatterns.h
ompi/patterns/net/netpatterns_base.c
ompi/patterns/net/netpatterns_knomial_tree.c
ompi/patterns/net/netpatterns_knomial_tree.h
ompi/patterns/net/netpatterns_multinomial_tree.c
ompi/patterns/net/netpatterns_nary_tree.c
     1.1 --- a/ompi/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
     1.2 +++ b/ompi/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
     1.3 @@ -171,6 +171,8 @@
     1.4  include mpi/Makefile.am
     1.5  include mpi/man/man3/Makefile.extra
     1.6  include mpiext/Makefile.am
     1.7 +include patterns/net/Makefile.am
     1.8 +include patterns/comm/Makefile.am
     1.9  
    1.10  # Ensure that the man page directory exists before we try to make man
    1.11  # page files (because ompi/mpi/man/man3 has no config.status-generated
     2.1 --- a/ompi/mca/bcol/basesmuma/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
     2.2 +++ b/ompi/mca/bcol/basesmuma/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
     2.3 @@ -51,9 +51,7 @@
     2.4  mca_bcol_basesmuma_la_SOURCES = $(sources)
     2.5  mca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
     2.6  mca_bcol_basesmuma_la_LIBADD = \
     2.7 -    	$(btl_portals_LIBS) \
     2.8 -        $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la \
     2.9 -        $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la 
    2.10 +    	$(btl_portals_LIBS)
    2.11  
    2.12  
    2.13  noinst_LTLIBRARIES = $(component_noinst)
     3.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h	Tue Feb 19 22:36:41 2013 +0000
     3.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma.h	Tue Feb 19 22:50:56 2013 +0000
     3.3 @@ -19,7 +19,7 @@
     3.4  #include "ompi/mca/coll/ml/coll_ml_allocation.h"
     3.5  #include "ompi/request/request.h"
     3.6  #include "ompi/proc/proc.h"
     3.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
     3.8 +#include "ompi/patterns/net/netpatterns.h"
     3.9  
    3.10  #include "orte/util/name_fns.h"
    3.11  #include "orte/util/proc_info.h"
    3.12 @@ -777,16 +777,16 @@
    3.13      sm_buffer_mgmt colls_with_user_data;
    3.14  
    3.15      /* recursive-doubling tree node */
    3.16 -    mca_common_netpatterns_pair_exchange_node_t recursive_doubling_tree;
    3.17 +    netpatterns_pair_exchange_node_t recursive_doubling_tree;
    3.18  
    3.19      /* k-nomial gather/allgather tree */
    3.20 -    mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
    3.21 +    netpatterns_k_exchange_node_t knomial_allgather_tree;
    3.22  
    3.23      /* fanin tree node - root is rank 0 */
    3.24 -    mca_common_netpatterns_tree_node_t fanin_node;
    3.25 +    netpatterns_tree_node_t fanin_node;
    3.26  
    3.27      /* fanout tree node - root is rank 0 */
    3.28 -    mca_common_netpatterns_tree_node_t fanout_node;
    3.29 +    netpatterns_tree_node_t fanout_node;
    3.30  
    3.31      /* index of blocking barrier memory region to use */
    3.32      int index_blocking_barrier_memory_bank;
    3.33 @@ -795,18 +795,18 @@
    3.34      int *comm_to_sm_map;
    3.35  
    3.36      /* reduction fanout tree */
    3.37 -    mca_common_netpatterns_tree_node_t* reduction_tree;
    3.38 +    netpatterns_tree_node_t* reduction_tree;
    3.39  
    3.40      /* broadcast fanout tree */
    3.41 -    mca_common_netpatterns_tree_node_t* fanout_read_tree;
    3.42 +    netpatterns_tree_node_t* fanout_read_tree;
    3.43  
    3.44      /* scatter - k-ary tree */
    3.45      int scatter_kary_radix;
    3.46 -    mca_common_netpatterns_tree_node_t *scatter_kary_tree;
    3.47 +    netpatterns_tree_node_t *scatter_kary_tree;
    3.48  
    3.49  	/* Knomial exchange tree */
    3.50  	/* Currently used for only large message reduce */
    3.51 -	mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
    3.52 +	netpatterns_k_exchange_node_t knomial_exchange_tree;
    3.53  
    3.54      /* sequence number offset - want to make sure that we start
    3.55       *   id'ing collectives with id 0, so we can have simple
     4.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c	Tue Feb 19 22:36:41 2013 +0000
     4.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c	Tue Feb 19 22:50:56 2013 +0000
     4.3 @@ -135,7 +135,7 @@
     4.4      volatile char* parent_data_pointer;
     4.5      mca_bcol_basesmuma_header_t *my_ctl_pointer;
     4.6      volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
     4.7 -    mca_common_netpatterns_tree_node_t* my_fanout_read_tree;
     4.8 +    netpatterns_tree_node_t* my_fanout_read_tree;
     4.9      size_t pack_len = 0, dt_size;
    4.10  
    4.11      void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
    4.12 @@ -268,7 +268,7 @@
    4.13      mca_bcol_basesmuma_module_t* bcol_module=
    4.14          (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
    4.15  
    4.16 -    mca_common_netpatterns_tree_node_t* my_fanout_read_tree;
    4.17 +    netpatterns_tree_node_t* my_fanout_read_tree;
    4.18      size_t pack_len = 0, dt_size;
    4.19  
    4.20      void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
     5.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c	Tue Feb 19 22:36:41 2013 +0000
     5.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c	Tue Feb 19 22:50:56 2013 +0000
     5.3 @@ -18,7 +18,7 @@
     5.4  #include "ompi/mca/bcol/bcol.h"
     5.5  #include "ompi/mca/bcol/base/base.h"
     5.6  #include "ompi/mca/coll/ml/coll_ml.h"
     5.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
     5.8 +#include "ompi/patterns/comm/coll_ops.h"
     5.9  #include "ompi/mca/dpm/dpm.h"
    5.10  
    5.11  
     6.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c	Tue Feb 19 22:36:41 2013 +0000
     6.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c	Tue Feb 19 22:50:56 2013 +0000
     6.3 @@ -14,7 +14,7 @@
     6.4  #include "ompi/constants.h"
     6.5  #include "ompi/communicator/communicator.h"
     6.6  #include "ompi/mca/bcol/bcol.h"
     6.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
     6.8 +#include "ompi/patterns/net/netpatterns.h"
     6.9  
    6.10  #include "opal/sys/atomic.h"
    6.11  
    6.12 @@ -52,7 +52,7 @@
    6.13      volatile mca_bcol_basesmuma_header_t *child_ctl;
    6.14  
    6.15  
    6.16 -    mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
    6.17 +    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
    6.18  
    6.19      /* Figure out - what instance of the basesmuma bcol I am */
    6.20      sequence_number = input_args->sequence_num;
    6.21 @@ -134,7 +134,7 @@
    6.22      volatile mca_bcol_basesmuma_header_t *child_ctl;
    6.23  
    6.24  
    6.25 -    mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
    6.26 +    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
    6.27  
    6.28      sequence_number = input_args->sequence_num;
    6.29  
     7.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c	Tue Feb 19 22:36:41 2013 +0000
     7.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c	Tue Feb 19 22:50:56 2013 +0000
     7.3 @@ -14,7 +14,7 @@
     7.4  #include "ompi/constants.h"
     7.5  #include "ompi/communicator/communicator.h"
     7.6  #include "ompi/mca/bcol/bcol.h"
     7.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
     7.8 +#include "ompi/patterns/net/netpatterns.h"
     7.9  
    7.10  #include "opal/sys/atomic.h"
    7.11  
    7.12 @@ -50,7 +50,7 @@
    7.13      volatile mca_bcol_basesmuma_header_t *parent_ctl;
    7.14  
    7.15  
    7.16 -    mca_common_netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
    7.17 +    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
    7.18  
    7.19      /* Figure out - what instance of the basesmuma bcol I am */
    7.20      sequence_number = input_args->sequence_num;
     8.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c	Tue Feb 19 22:36:41 2013 +0000
     8.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c	Tue Feb 19 22:50:56 2013 +0000
     8.3 @@ -19,7 +19,7 @@
     8.4  #include "ompi/mca/bcol/bcol.h"
     8.5  #include "ompi/mca/bcol/base/base.h"
     8.6  #include "ompi/mca/dpm/dpm.h"
     8.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
     8.8 +#include "ompi/patterns/net/netpatterns.h"
     8.9  
    8.10  
    8.11  #include "orte/mca/grpcomm/grpcomm.h" 
    8.12 @@ -221,7 +221,7 @@
    8.13  				*sm_module)
    8.14  {
    8.15  	    int rc = OMPI_SUCCESS;
    8.16 -	    rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
    8.17 +	    rc = netpatterns_setup_recursive_knomial_tree_node(
    8.18  						                    sm_module->super.sbgp_partner_module->group_size,
    8.19     			              					sm_module->super.sbgp_partner_module->my_index,
    8.20              		     					mca_bcol_basesmuma_component.k_nomial_radix,
    8.21 @@ -234,7 +234,7 @@
    8.22  {
    8.23      mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super;
    8.24      
    8.25 -    return mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
    8.26 +    return netpatterns_setup_recursive_knomial_allgather_tree_node(
    8.27              sm_module->super.sbgp_partner_module->group_size,
    8.28              sm_module->super.sbgp_partner_module->my_index,
    8.29              mca_bcol_basesmuma_component.k_nomial_radix,
    8.30 @@ -294,7 +294,7 @@
    8.31      sm_module->reduction_tree = NULL;
    8.32      sm_module->fanout_read_tree = NULL;
    8.33  
    8.34 -    ret=mca_common_netpatterns_setup_recursive_doubling_tree_node(
    8.35 +    ret=netpatterns_setup_recursive_doubling_tree_node(
    8.36          module->group_size,module->my_index,
    8.37          &(sm_module->recursive_doubling_tree));
    8.38      if(OMPI_SUCCESS != ret) {
    8.39 @@ -306,7 +306,7 @@
    8.40      /* setup the fanin tree - this is used only as part of a hierarchical
    8.41       *   barrier, so will set this up with rank 0 as the root */
    8.42      my_rank=module->my_index;
    8.43 -    ret=mca_common_netpatterns_setup_narray_tree(cs->radix_fanin,
    8.44 +    ret=netpatterns_setup_narray_tree(cs->radix_fanin,
    8.45          my_rank,module->group_size,&(sm_module->fanin_node));
    8.46      if(OMPI_SUCCESS != ret) {
    8.47  	    fprintf(stderr,"Error setting up fanin tree \n");
    8.48 @@ -316,7 +316,7 @@
    8.49  
    8.50      /* setup the fanout tree - this is used only as part of a hierarchical
    8.51       *   barrier, so will set this up with rank 0 as the root */
    8.52 -    ret=mca_common_netpatterns_setup_narray_tree(cs->radix_fanout,
    8.53 +    ret=netpatterns_setup_narray_tree(cs->radix_fanout,
    8.54          my_rank,module->group_size,&(sm_module->fanout_node));
    8.55      if(OMPI_SUCCESS != ret) {
    8.56  	    fprintf(stderr,"Error setting up fanout tree \n");
    8.57 @@ -333,14 +333,14 @@
    8.58        bcast_radix = cs->radix_read_tree;
    8.59  
    8.60         /* initialize fan-out read tree */
    8.61 -       sm_module->fanout_read_tree=(mca_common_netpatterns_tree_node_t*) malloc(
    8.62 -               sizeof(mca_common_netpatterns_tree_node_t)*module->group_size);
    8.63 +       sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc(
    8.64 +               sizeof(netpatterns_tree_node_t)*module->group_size);
    8.65         if( NULL == sm_module->fanout_read_tree ) {
    8.66             goto Error;
    8.67         }
    8.68  
    8.69         for(i = 0; i < module->group_size; i++){
    8.70 -          ret = mca_common_netpatterns_setup_narray_tree(bcast_radix,
    8.71 +          ret = netpatterns_setup_narray_tree(bcast_radix,
    8.72                    i, module->group_size, &(sm_module->fanout_read_tree[i]));
    8.73            if(OMPI_SUCCESS != ret) {
    8.74                goto Error;
    8.75 @@ -363,13 +363,13 @@
    8.76      */
    8.77  
    8.78      /* initialize reduction tree */
    8.79 -    sm_module->reduction_tree=(mca_common_netpatterns_tree_node_t *) malloc(
    8.80 -            sizeof(mca_common_netpatterns_tree_node_t )*module->group_size);
    8.81 +    sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc(
    8.82 +            sizeof(netpatterns_tree_node_t )*module->group_size);
    8.83      if( NULL == sm_module->reduction_tree ) { 
    8.84          goto Error;
    8.85      }
    8.86          
    8.87 -    ret=mca_common_netpatterns_setup_multinomial_tree(
    8.88 +    ret=netpatterns_setup_multinomial_tree(
    8.89              cs->order_reduction_tree,module->group_size,
    8.90              sm_module->reduction_tree);
    8.91      if( MPI_SUCCESS != ret ) {
    8.92 @@ -393,7 +393,7 @@
    8.93       */
    8.94      sm_module->scatter_kary_radix=cs->scatter_kary_radix;
    8.95      sm_module->scatter_kary_tree=NULL;
    8.96 -    ret=mca_common_netpatterns_setup_narray_tree_contigous_ranks(
    8.97 +    ret=netpatterns_setup_narray_tree_contigous_ranks(
    8.98              sm_module->scatter_kary_radix,
    8.99              sm_module->super.sbgp_partner_module->group_size,
   8.100              &(sm_module->scatter_kary_tree));
     9.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c	Tue Feb 19 22:36:41 2013 +0000
     9.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c	Tue Feb 19 22:50:56 2013 +0000
     9.3 @@ -17,7 +17,7 @@
     9.4  #include "ompi/mca/bcol/bcol.h"
     9.5  #include "bcol_basesmuma.h"
     9.6  #include "opal/sys/atomic.h"
     9.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
     9.8 +#include "ompi/patterns/net/netpatterns.h"
     9.9  
    9.10  /*
    9.11   * Initialize nonblocking barrier.  This is code specific for handling
    9.12 @@ -35,7 +35,7 @@
    9.13      int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
    9.14      int pair_rank;
    9.15      mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
    9.16 -    mca_common_netpatterns_pair_exchange_node_t *my_exchange_node;
    9.17 +    netpatterns_pair_exchange_node_t *my_exchange_node;
    9.18      int extra_rank, my_rank, pow_2;
    9.19      mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
    9.20      mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
    9.21 @@ -205,7 +205,7 @@
    9.22      int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
    9.23      int pair_rank, start_index, restart_phase;
    9.24      mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
    9.25 -    mca_common_netpatterns_pair_exchange_node_t *my_exchange_node;
    9.26 +    netpatterns_pair_exchange_node_t *my_exchange_node;
    9.27      int extra_rank, my_rank, pow_2;
    9.28      mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
    9.29      mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
    10.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c	Tue Feb 19 22:36:41 2013 +0000
    10.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c	Tue Feb 19 22:50:56 2013 +0000
    10.3 @@ -55,7 +55,7 @@
    10.4      int flag_offset = 0;
    10.5      volatile int8_t ready_flag;
    10.6      mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
    10.7 -    mca_common_netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
    10.8 +    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
    10.9      mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
   10.10      uint32_t buffer_index = input_args->buffer_index;
   10.11      int *active_requests =
   10.12 @@ -244,7 +244,7 @@
   10.13      int flag_offset;
   10.14      volatile int8_t ready_flag;
   10.15      mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
   10.16 -    mca_common_netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
   10.17 +    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
   10.18      mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
   10.19      uint32_t buffer_index = input_args->buffer_index;
   10.20      int *active_requests =
    11.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c	Tue Feb 19 22:36:41 2013 +0000
    11.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c	Tue Feb 19 22:50:56 2013 +0000
    11.3 @@ -21,7 +21,7 @@
    11.4  #include "ompi/mca/bcol/bcol.h"
    11.5  #include "ompi/mca/bcol/base/base.h"
    11.6  #include "ompi/mca/dpm/dpm.h"
    11.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
    11.8 +#include "ompi/patterns/comm/coll_ops.h"
    11.9  
   11.10  #include "orte/mca/rml/rml.h"
   11.11  #include "orte/mca/rml/rml_types.h"
    12.1 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c	Tue Feb 19 22:36:41 2013 +0000
    12.2 +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c	Tue Feb 19 22:50:56 2013 +0000
    12.3 @@ -18,7 +18,7 @@
    12.4  
    12.5  #include "ompi/mca/dpm/dpm.h"
    12.6  #include "ompi/proc/proc.h"
    12.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
    12.8 +#include "ompi/patterns/comm/coll_ops.h"
    12.9  
   12.10  #include "orte/util/show_help.h"
   12.11  #include "orte/util/name_fns.h"
    13.1 --- a/ompi/mca/bcol/bcol.h	Tue Feb 19 22:36:41 2013 +0000
    13.2 +++ b/ompi/mca/bcol/bcol.h	Tue Feb 19 22:50:56 2013 +0000
    13.3 @@ -20,7 +20,7 @@
    13.4  #include "ompi/datatype/ompi_datatype.h"
    13.5  #include "ompi/op/op.h"
    13.6  #include "ompi/include/ompi/constants.h"
    13.7 -#include "ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h"
    13.8 +#include "ompi/patterns/net/netpatterns_knomial_tree.h"
    13.9  
   13.10  #include <limits.h>
   13.11  
    14.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload.h	Tue Feb 19 22:36:41 2013 +0000
    14.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload.h	Tue Feb 19 22:50:56 2013 +0000
    14.3 @@ -366,16 +366,16 @@
    14.4      opal_list_t collfrag_pending;
    14.5  
    14.6      /* recursive-doubling tree node */
    14.7 -    mca_common_netpatterns_pair_exchange_node_t recursive_doubling_tree;
    14.8 +    netpatterns_pair_exchange_node_t recursive_doubling_tree;
    14.9  
   14.10      /* N exchange tree */
   14.11 -    mca_common_netpatterns_pair_exchange_node_t n_exchange_tree;
   14.12 +    netpatterns_pair_exchange_node_t n_exchange_tree;
   14.13  
   14.14      /* Knomial exchange tree */
   14.15 -    mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
   14.16 +    netpatterns_k_exchange_node_t knomial_exchange_tree;
   14.17  
   14.18      /* Knomial exchange tree */
   14.19 -    mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
   14.20 +    netpatterns_k_exchange_node_t knomial_allgather_tree;
   14.21  
   14.22      /* The array will keep pre-calculated task consumption per
   14.23       * algorithm
    15.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c	Tue Feb 19 22:36:41 2013 +0000
    15.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c	Tue Feb 19 22:50:56 2013 +0000
    15.3 @@ -54,7 +54,7 @@
    15.4      mca_bcol_iboffload_frag_t *send_fragment = NULL,
    15.5                                *preposted_recv_frag = NULL;
    15.6  
    15.7 -    mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
    15.8 +    netpatterns_pair_exchange_node_t *my_exchange_node =
    15.9                                            &iboffload->recursive_doubling_tree;
   15.10  
   15.11      IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_doubling.\n"));
   15.12 @@ -364,7 +364,7 @@
   15.13  /* Recursive K - ing*/
   15.14  static int recursive_knomial_start_connections(struct mca_bcol_iboffload_module_t *iboffload)
   15.15  {
   15.16 -    mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
   15.17 +    netpatterns_k_exchange_node_t *my_exchange_node =
   15.18          &iboffload->knomial_exchange_tree;
   15.19      int k, i, n_exchanges = my_exchange_node->n_exchanges,
   15.20          **exchanges = my_exchange_node->rank_exchanges,
   15.21 @@ -442,7 +442,7 @@
   15.22      mca_bcol_iboffload_frag_t *send_fragment = NULL,
   15.23                                *preposted_recv_frag = NULL;
   15.24  
   15.25 -    mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
   15.26 +    netpatterns_k_exchange_node_t *my_exchange_node =
   15.27          &iboffload->knomial_exchange_tree;
   15.28      IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_knomial. Node type %d\n", my_exchange_node->node_type));
   15.29  
   15.30 @@ -706,7 +706,7 @@
   15.31  
   15.32  int mca_bcol_iboffload_rec_doubling_start_connections(mca_bcol_iboffload_module_t *iboffload)
   15.33  {
   15.34 -    mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
   15.35 +    netpatterns_pair_exchange_node_t *my_exchange_node =
   15.36                                            &iboffload->recursive_doubling_tree;
   15.37  
   15.38      int i, n_exchanges = my_exchange_node->n_exchanges,
    16.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c	Tue Feb 19 22:36:41 2013 +0000
    16.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c	Tue Feb 19 22:50:56 2013 +0000
    16.3 @@ -203,7 +203,7 @@
    16.4  static int mca_bcol_iboffload_small_msg_bcast_exec(mca_bcol_iboffload_module_t *iboffload_module,
    16.5                                                     mca_bcol_iboffload_collreq_t *coll_request)
    16.6  {
    16.7 -    mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
    16.8 +    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
    16.9          &iboffload_module->recursive_doubling_tree;
   16.10  
   16.11      int rc,
   16.12 @@ -396,7 +396,7 @@
   16.13  static int mca_bcol_iboffload_small_msg_bcast_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
   16.14                                                     mca_bcol_iboffload_collreq_t *coll_request)
   16.15  {
   16.16 -    mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
   16.17 +    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
   16.18          &iboffload_module->recursive_doubling_tree;
   16.19  
   16.20      int rc,
   16.21 @@ -617,7 +617,7 @@
   16.22  static int mca_bcol_iboffload_bcast_scatter_allgather_exec(mca_bcol_iboffload_module_t *iboffload_module,
   16.23          mca_bcol_iboffload_collreq_t *coll_request)
   16.24  {
   16.25 -    mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
   16.26 +    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
   16.27          &iboffload_module->recursive_doubling_tree;
   16.28  
   16.29      int rc,
   16.30 @@ -857,7 +857,7 @@
   16.31  static int mca_bcol_iboffload_bcast_scatter_allgather_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
   16.32          mca_bcol_iboffload_collreq_t *coll_request)
   16.33  {
   16.34 -    mca_common_netpatterns_pair_exchange_node_t *recursive_doubling_tree =
   16.35 +    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
   16.36          &iboffload_module->recursive_doubling_tree;
   16.37  
   16.38      int rc, dst;
    17.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h	Tue Feb 19 22:36:41 2013 +0000
    17.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h	Tue Feb 19 22:50:56 2013 +0000
    17.3 @@ -364,7 +364,7 @@
    17.4  
    17.5  static inline void bcol_iboffload_setup_binomial_connection(mca_bcol_iboffload_module_t *iboffload)
    17.6  {
    17.7 -    mca_common_netpatterns_pair_exchange_node_t *my_exchange_node =
    17.8 +    netpatterns_pair_exchange_node_t *my_exchange_node =
    17.9                                            &iboffload->recursive_doubling_tree;
   17.10  
   17.11      int i, n_exchanges = my_exchange_node->n_exchanges,
    18.1 --- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c	Tue Feb 19 22:36:41 2013 +0000
    18.2 +++ b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c	Tue Feb 19 22:50:56 2013 +0000
    18.3 @@ -178,8 +178,8 @@
    18.4          free(module->endpoints);
    18.5      }
    18.6  
    18.7 -    mca_common_netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
    18.8 -    mca_common_netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
    18.9 +    netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
   18.10 +    netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
   18.11  
   18.12      OBJ_RELEASE(module->device->net_context);
   18.13      OBJ_RELEASE(module->device);
   18.14 @@ -745,7 +745,7 @@
   18.15  {
   18.16      int rc;
   18.17      mca_bcol_iboffload_module_t *ib_module = (mca_bcol_iboffload_module_t *) super;
   18.18 -    rc = mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
   18.19 +    rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
   18.20              ib_module->super.sbgp_partner_module->group_size,
   18.21              ib_module->super.sbgp_partner_module->my_index,
   18.22              mca_bcol_iboffload_component.k_nomial_radix,
   18.23 @@ -1090,7 +1090,7 @@
   18.24          /* Barrier initialization - recuresive doubling */
   18.25  #if 1
   18.26          if (OMPI_SUCCESS !=
   18.27 -                    mca_common_netpatterns_setup_recursive_doubling_tree_node(
   18.28 +                    netpatterns_setup_recursive_doubling_tree_node(
   18.29                                  iboffload_module->group_size, my_rank,
   18.30                                  &iboffload_module->recursive_doubling_tree)) {
   18.31              IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree,"
   18.32 @@ -1101,7 +1101,7 @@
   18.33  
   18.34          /* Barrier initialization - N exchange tree */
   18.35          if (OMPI_SUCCESS !=
   18.36 -                mca_common_netpatterns_setup_recursive_doubling_n_tree_node(
   18.37 +                netpatterns_setup_recursive_doubling_n_tree_node(
   18.38                                  iboffload_module->group_size, my_rank,
   18.39                                  cm->exchange_tree_order,
   18.40                                  &iboffload_module->n_exchange_tree)) {
   18.41 @@ -1113,7 +1113,7 @@
   18.42  
   18.43          /* Recursive K-ing initialization - Knomial exchange tree */
   18.44          if (OMPI_SUCCESS !=
   18.45 -                mca_common_netpatterns_setup_recursive_knomial_tree_node(
   18.46 +                netpatterns_setup_recursive_knomial_tree_node(
   18.47                                  iboffload_module->group_size, my_rank,
   18.48                                  cm->knomial_tree_order,
   18.49                                  &iboffload_module->knomial_exchange_tree)) {
   18.50 @@ -1156,7 +1156,7 @@
   18.51          }
   18.52          /* that should take care of that */
   18.53          if (OMPI_SUCCESS !=
   18.54 -                mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
   18.55 +                netpatterns_setup_recursive_knomial_allgather_tree_node(
   18.56                                  iboffload_module->group_size, sbgp->group_list[my_rank],
   18.57                                  cm->k_nomial_radix, iboffload_module->super.list_n_connected,
   18.58                                  &iboffload_module->knomial_allgather_tree)) {
    19.1 --- a/ompi/mca/bcol/ptpcoll/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
    19.2 +++ b/ompi/mca/bcol/ptpcoll/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
    19.3 @@ -44,8 +44,7 @@
    19.4  mcacomponent_LTLIBRARIES = $(component_install)
    19.5  mca_bcol_ptpcoll_la_SOURCES = $(sources)
    19.6  mca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
    19.7 -mca_bcol_ptpcoll_la_LIBADD = \
    19.8 -                             $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
    19.9 +mca_bcol_ptpcoll_la_LIBADD = 
   19.10  
   19.11  noinst_LTLIBRARIES = $(component_noinst)
   19.12  libmca_bcol_ptpcoll_la_SOURCES =$(sources)
    20.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h	Tue Feb 19 22:36:41 2013 +0000
    20.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h	Tue Feb 19 22:50:56 2013 +0000
    20.3 @@ -20,7 +20,7 @@
    20.4  #include "ompi/request/request.h"
    20.5  #include "ompi/mca/pml/pml.h"
    20.6  #include "ompi/mca/coll/ml/coll_ml_allocation.h"
    20.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
    20.8 +#include "ompi/patterns/net/netpatterns.h"
    20.9  
   20.10  BEGIN_C_DECLS
   20.11  
   20.12 @@ -297,7 +297,7 @@
   20.13      int full_narray_tree_num_leafs;
   20.14  
   20.15      /* Nary tree info */
   20.16 -    mca_common_netpatterns_tree_node_t *narray_node;
   20.17 +    netpatterns_tree_node_t *narray_node;
   20.18  
   20.19      /* if the rank in group, it keeps the extra peer. 
   20.20         if the rank is extra, it keeps the proxy peer.
   20.21 @@ -328,13 +328,13 @@
   20.22      /* number of extra peers , maximum k - 1*/
   20.23      int narray_knomial_proxy_num; 
   20.24      /* Narray-Knomial node information array */
   20.25 -    mca_common_netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
   20.26 +    netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
   20.27      /* Knomial exchange tree */ 
   20.28 -    mca_common_netpatterns_k_exchange_node_t knomial_exchange_tree;
   20.29 +    netpatterns_k_exchange_node_t knomial_exchange_tree;
   20.30      /* knomial allgather tree --- Do not disable, we need both 
   20.31         different algorithms define recursive k - ing differently
   20.32       */
   20.33 -    mca_common_netpatterns_k_exchange_node_t knomial_allgather_tree;
   20.34 +    netpatterns_k_exchange_node_t knomial_allgather_tree;
   20.35  
   20.36  	/* Knomial allgather offsets */
   20.37  	int **allgather_offsets;
    21.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c	Tue Feb 19 22:36:41 2013 +0000
    21.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c	Tue Feb 19 22:50:56 2013 +0000
    21.3 @@ -34,7 +34,7 @@
    21.4      mca_bcol_ptpcoll_module_t *ptpcoll_module =
    21.5                          (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
    21.6  
    21.7 -    mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
    21.8 +    netpatterns_k_exchange_node_t *my_exchange_node =
    21.9                                         &ptpcoll_module->knomial_exchange_tree;
   21.10  
   21.11      int rc, k, pair_comm_rank, exchange, completed,
   21.12 @@ -223,7 +223,7 @@
   21.13      mca_bcol_ptpcoll_module_t *ptpcoll_module =
   21.14                          (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
   21.15  
   21.16 -    mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
   21.17 +    netpatterns_k_exchange_node_t *my_exchange_node =
   21.18                                         &ptpcoll_module->knomial_exchange_tree;
   21.19  
   21.20      int rc, k, tag, pair_comm_rank, exchange,
   21.21 @@ -371,7 +371,7 @@
   21.22      mca_bcol_ptpcoll_module_t *ptpcoll_module =
   21.23                      (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
   21.24  
   21.25 -    mca_common_netpatterns_k_exchange_node_t *my_exchange_node =
   21.26 +    netpatterns_k_exchange_node_t *my_exchange_node =
   21.27                                     &ptpcoll_module->knomial_exchange_tree;
   21.28  
   21.29      ompi_communicator_t *comm =
   21.30 @@ -862,7 +862,7 @@
   21.31  
   21.32  static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
   21.33  {
   21.34 -    mca_common_netpatterns_k_exchange_node_t *my_exchange_node;
   21.35 +    netpatterns_k_exchange_node_t *my_exchange_node;
   21.36      mca_bcol_ptpcoll_module_t * ptpcoll_module =
   21.37                             (mca_bcol_ptpcoll_module_t *) super;
   21.38  
    22.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c	Tue Feb 19 22:36:41 2013 +0000
    22.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c	Tue Feb 19 22:50:56 2013 +0000
    22.3 @@ -141,7 +141,7 @@
    22.4      int count = input_args->count * input_args->dtype->super.size;
    22.5      int *active_requests =
    22.6          &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
    22.7 -    mca_common_netpatter_knomial_step_info_t step_info = {0, 0, 0};
    22.8 +    netpatterns_knomial_step_info_t step_info = {0, 0, 0};
    22.9  
   22.10      PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
   22.11                      const_args->index_of_this_type_in_collective + 1,
   22.12 @@ -529,7 +529,7 @@
   22.13          /* No data was received. Waiting for data */
   22.14          if (0 == (*active_requests)) {
   22.15              int extra_root = -1;
   22.16 -            mca_common_netpatter_knomial_step_info_t step_info;
   22.17 +            netpatterns_knomial_step_info_t step_info;
   22.18              /* We can not block. So run couple of test for data arrival */
   22.19              if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
   22.20                  PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)",
   22.21 @@ -624,7 +624,7 @@
   22.22      int matched = 0;
   22.23      int k_level, logk_level;
   22.24      int extra_root = -1;
   22.25 -    mca_common_netpatter_knomial_step_info_t step_info;
   22.26 +    netpatterns_knomial_step_info_t step_info;
   22.27  
   22.28      PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d",
   22.29                      const_args->index_of_this_type_in_collective + 1,
   22.30 @@ -694,7 +694,7 @@
   22.31          }
   22.32      }
   22.33  
   22.34 -    data_src = mca_common_netpatterns_get_knomial_data_source(
   22.35 +    data_src = netpatterns_get_knomial_data_source(
   22.36                  my_group_index, group_root_index, radix, ptpcoll_module->pow_knum,
   22.37                  &k_level, &logk_level);
   22.38  
   22.39 @@ -1709,8 +1709,8 @@
   22.40      int group_size = ptpcoll_module->full_narray_tree_size;
   22.41      int completed = 0;
   22.42      int virtual_root;
   22.43 -    mca_common_netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
   22.44 -    mca_common_netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
   22.45 +    netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
   22.46 +    netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
   22.47  
   22.48      PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
   22.49                      const_args->index_of_this_type_in_collective + 1,
   22.50 @@ -2032,7 +2032,7 @@
   22.51      int matched = true;
   22.52      int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
   22.53      int relative_group_index = 0;
   22.54 -    mca_common_netpatterns_tree_node_t *narray_node = NULL;
   22.55 +    netpatterns_tree_node_t *narray_node = NULL;
   22.56  
   22.57      PTPCOLL_VERBOSE(3, ("Bcast, Narray tree Progress"));
   22.58  
   22.59 @@ -2119,7 +2119,7 @@
   22.60      int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
   22.61      int group_root_index;
   22.62      int relative_group_index = 0;
   22.63 -    mca_common_netpatterns_tree_node_t *narray_node = NULL;
   22.64 +    netpatterns_tree_node_t *narray_node = NULL;
   22.65  
   22.66      PTPCOLL_VERBOSE(3, ("Bcast, Narray tree"));
   22.67  
    23.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h	Tue Feb 19 22:36:41 2013 +0000
    23.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h	Tue Feb 19 22:50:56 2013 +0000
    23.3 @@ -756,9 +756,9 @@
    23.4      ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
    23.5      ompi_request_t **requests = 
    23.6          ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
    23.7 -    mca_common_netpatterns_narray_knomial_tree_node_t *narray_node =
    23.8 +    netpatterns_narray_knomial_tree_node_t *narray_node =
    23.9          &ptpcoll_module->narray_knomial_node[relative_group_index];
   23.10 -    mca_common_netpatterns_k_exchange_node_t *k_node =
   23.11 +    netpatterns_k_exchange_node_t *k_node =
   23.12          &narray_node->k_node;
   23.13      mca_bcol_ptpcoll_component_t *cm =
   23.14          &mca_bcol_ptpcoll_component;
    24.1 --- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c	Tue Feb 19 22:36:41 2013 +0000
    24.2 +++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c	Tue Feb 19 22:50:56 2013 +0000
    24.3 @@ -265,7 +265,7 @@
    24.4  {
    24.5     mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super;
    24.6     int rc = 0; 
    24.7 -   rc = mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
    24.8 +   rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
    24.9                  p2p_module->super.sbgp_partner_module->group_size,
   24.10                  p2p_module->super.sbgp_partner_module->my_index,
   24.11                  mca_bcol_ptpcoll_component.k_nomial_radix,
   24.12 @@ -315,7 +315,7 @@
   24.13  
   24.14      ptpcoll_module->narray_knomial_node = calloc(
   24.15              ptpcoll_module->full_narray_tree_size,
   24.16 -            sizeof(mca_common_netpatterns_narray_knomial_tree_node_t));
   24.17 +            sizeof(netpatterns_narray_knomial_tree_node_t));
   24.18      if(NULL == ptpcoll_module->narray_knomial_node) {
   24.19          goto Error;
   24.20      }
   24.21 @@ -346,7 +346,7 @@
   24.22          }
   24.23          /* Setting node info */
   24.24          for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
   24.25 -            rc = mca_common_netpatterns_setup_narray_knomial_tree(
   24.26 +            rc = netpatterns_setup_narray_knomial_tree(
   24.27                      cm->narray_knomial_radix,
   24.28                      i,
   24.29                      ptpcoll_module->full_narray_tree_size,
   24.30 @@ -382,13 +382,13 @@
   24.31      mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
   24.32  
   24.33      ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size,
   24.34 -            sizeof(mca_common_netpatterns_tree_node_t));
   24.35 +            sizeof(netpatterns_tree_node_t));
   24.36      if(NULL == ptpcoll_module->narray_node ) {
   24.37          goto Error;
   24.38      }
   24.39  
   24.40      for(i = 0; i < ptpcoll_module->group_size; i++) {
   24.41 -        rc = mca_common_netpatterns_setup_narray_tree(
   24.42 +        rc = netpatterns_setup_narray_tree(
   24.43                  cm->narray_radix,
   24.44                  i,
   24.45                  ptpcoll_module->group_size,
   24.46 @@ -510,7 +510,7 @@
   24.47  static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
   24.48  {
   24.49      int rc = OMPI_SUCCESS;
   24.50 -    rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
   24.51 +    rc = netpatterns_setup_recursive_knomial_tree_node(
   24.52                      ptpcoll_module->group_size,
   24.53                      ptpcoll_module->super.sbgp_partner_module->my_index,
   24.54                      mca_bcol_ptpcoll_component.k_nomial_radix,
    25.1 --- a/ompi/mca/coll/ml/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
    25.2 +++ b/ompi/mca/coll/ml/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
    25.3 @@ -68,18 +68,9 @@
    25.4  mcacomponent_LTLIBRARIES = $(component_install)
    25.5  mca_coll_ml_la_SOURCES = $(sources)
    25.6  mca_coll_ml_la_LDFLAGS = -module -avoid-version
    25.7 -mca_coll_ml_la_LIBADD = \
    25.8 -    $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la \
    25.9 -    $(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
   25.10 +mca_coll_ml_la_LIBADD =
   25.11  
   25.12  
   25.13  noinst_LTLIBRARIES = $(component_noinst)
   25.14  libmca_coll_ml_la_SOURCES =$(sources)
   25.15  libmca_coll_ml_la_LDFLAGS = -module -avoid-version
   25.16 -
   25.17 -$(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la: foo.c
   25.18 -	cd $(top_ompi_builddir)/ompi/mca/common/commpatterns && $(MAKE)
   25.19 -$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la: foo.c
   25.20 -	cd $(top_ompi_builddir)/ompi/mca/common/netpatterns && $(MAKE)
   25.21 -
   25.22 -foo.c:
    26.1 --- a/ompi/mca/coll/ml/coll_ml_component.c	Tue Feb 19 22:36:41 2013 +0000
    26.2 +++ b/ompi/mca/coll/ml/coll_ml_component.c	Tue Feb 19 22:50:56 2013 +0000
    26.3 @@ -36,7 +36,7 @@
    26.4  #include "coll_ml.h"
    26.5  #include "coll_ml_inlines.h"
    26.6  
    26.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
    26.8 +#include "ompi/patterns/net/netpatterns.h"
    26.9  #include "coll_ml_mca.h"
   26.10  #include "coll_ml_custom_utils.h"
   26.11  
   26.12 @@ -460,7 +460,7 @@
   26.13      OBJ_CONSTRUCT(&(cs->sequential_collectives_mutex), opal_mutex_t);
   26.14      OBJ_CONSTRUCT(&(cs->sequential_collectives), opal_list_t);
   26.15  
   26.16 -    rc = ompi_common_netpatterns_init();
   26.17 +    rc = netpatterns_init();
   26.18      if (OMPI_SUCCESS != rc) {
   26.19          return rc;
   26.20      }
    27.1 --- a/ompi/mca/coll/ml/coll_ml_custom_utils.c	Tue Feb 19 22:36:41 2013 +0000
    27.2 +++ b/ompi/mca/coll/ml/coll_ml_custom_utils.c	Tue Feb 19 22:50:56 2013 +0000
    27.3 @@ -32,7 +32,7 @@
    27.4  #include "ompi/mca/coll/base/base.h"
    27.5  #include "ompi/mca/coll/ml/coll_ml.h"
    27.6  #include "ompi/mca/coll/ml/coll_ml_inlines.h"
    27.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
    27.8 +#include "ompi/patterns/comm/coll_ops.h"
    27.9  
   27.10  #include "ompi/datatype/ompi_datatype.h"
   27.11  #include "ompi/communicator/communicator.h"
    28.1 --- a/ompi/mca/coll/ml/coll_ml_mca.c	Tue Feb 19 22:36:41 2013 +0000
    28.2 +++ b/ompi/mca/coll/ml/coll_ml_mca.c	Tue Feb 19 22:50:56 2013 +0000
    28.3 @@ -25,7 +25,7 @@
    28.4  #include "coll_ml_inlines.h"
    28.5  #include "coll_ml_mca.h"
    28.6  #include "coll_ml_lmngr.h"
    28.7 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
    28.8 +#include "ompi/patterns/net/netpatterns.h"
    28.9  #include "opal/mca/installdirs/installdirs.h"
   28.10  
   28.11  /*
    29.1 --- a/ompi/mca/coll/ml/coll_ml_module.c	Tue Feb 19 22:36:41 2013 +0000
    29.2 +++ b/ompi/mca/coll/ml/coll_ml_module.c	Tue Feb 19 22:50:56 2013 +0000
    29.3 @@ -30,7 +30,7 @@
    29.4  #include "ompi/mca/sbgp/base/base.h"
    29.5  #include "ompi/mca/bcol/base/base.h"
    29.6  #include "ompi/mca/sbgp/sbgp.h"
    29.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
    29.8 +#include "ompi/patterns/comm/coll_ops.h"
    29.9  #include "ompi/mca/coll/ml/coll_ml.h"
   29.10  
   29.11  #include "orte/mca/rml/rml.h"
    30.1 --- a/ompi/mca/common/commpatterns/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,104 +0,0 @@
    30.4 -#
    30.5 -# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    30.6 -# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    30.7 -# $COPYRIGHT$
    30.8 -# 
    30.9 -# Additional copyrights may follow
   30.10 -# 
   30.11 -# $HEADER$
   30.12 -#
   30.13 -
   30.14 -# A word of explanation...
   30.15 -#
   30.16 -# This library is linked against various MCA components because all
   30.17 -# shared-memory based components (e.g., mpool, ptl, etc.)  need to
   30.18 -# share some common code and data.  There's two cases:
   30.19 -#
   30.20 -# 1. libmca_common_commpatterns.la is a shared library.  By linking that shared
   30.21 -# library to all components that need it, the OS linker will
   30.22 -# automatically load it into the process as necessary, and there will
   30.23 -# only be one copy (i.e., all the components will share *one* copy of
   30.24 -# the code and data).
   30.25 -#
   30.26 -# 2. libmca_common_commpatterns.la is a static library.  In this case, it will
   30.27 -# be rolled up into the top-level libmpi.la.  It will also be rolled
   30.28 -# into each component, but then the component will also be rolled up
   30.29 -# into the upper-level libmpi.la.  Linkers universally know how to
   30.30 -# "figure this out" so that we end up with only one copy of the code
   30.31 -# and data.
   30.32 -#
   30.33 -# Note that building this common component statically and linking
   30.34 -# against other dynamic components is *not* supported!
   30.35 -
   30.36 -EXTRA_DIST = .windows
   30.37 -
   30.38 -# Header files
   30.39 -
   30.40 -headers = \
   30.41 -        common_coll_ops.h \
   30.42 -        common_netpatterns.h \
   30.43 -        ompi_common_netpatterns_macros.h
   30.44 -
   30.45 -# Source files
   30.46 -
   30.47 -sources = \
   30.48 -        common_allreduce.c \
   30.49 -        common_allgather.c \
   30.50 -        common_bcast.c
   30.51 -
   30.52 -# As per above, we'll either have an installable or noinst result.
   30.53 -# The installable one should follow the same MCA prefix naming rules
   30.54 -# (i.e., libmca_<type>_<name>.la).  The noinst one can be named
   30.55 -# whatever it wants, although libmca_<type>_<name>_noinst.la is
   30.56 -# recommended.
   30.57 -
   30.58 -# To simplify components that link to this library, we will *always*
   30.59 -# have an output libtool library named libmca_<type>_<name>.la -- even
   30.60 -# for case 2) described above (i.e., so there's no conditional logic
   30.61 -# necessary in component Makefile.am's that link to this library).
   30.62 -# Hence, if we're creating a noinst version of this library (i.e.,
   30.63 -# case 2), we sym link it to the libmca_<type>_<name>.la name
   30.64 -# (libtool will do the Right Things under the covers).  See the
   30.65 -# all-local and clean-local rules, below, for how this is effected.
   30.66 -
   30.67 -lib_LTLIBRARIES =
   30.68 -noinst_LTLIBRARIES =
   30.69 -comp_inst = libmca_common_commpatterns.la
   30.70 -comp_noinst = libmca_common_commpatterns_noinst.la
   30.71 -
   30.72 -if MCA_BUILD_ompi_common_commpatterns_DSO
   30.73 -lib_LTLIBRARIES += $(comp_inst)
   30.74 -else
   30.75 -noinst_LTLIBRARIES += $(comp_noinst)
   30.76 -endif
   30.77 -
   30.78 -libmca_common_commpatterns_la_SOURCES = $(headers) $(sources)
   30.79 -libmca_common_commpatterns_noinst_la_SOURCES = $(libmca_common_commpatterns_la_SOURCES)
   30.80 -libmca_common_commpatterns_la_LIBADD = \
   30.81 -		$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la
   30.82 -
   30.83 -# These two rules will sym link the "noinst" libtool library filename
   30.84 -# to the installable libtool library filename in the case where we are
   30.85 -# compiling this component statically (case 2), described above).
   30.86 -
   30.87 -all-local:
   30.88 -	if test -z "$(lib_LTLIBRARIES)"; then \
   30.89 -	  rm -f "$(comp_inst)"; \
   30.90 -	  $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
   30.91 -	fi
   30.92 -
   30.93 -clean-local:
   30.94 -	if test -z "$(lib_LTLIBRARIES)"; then \
   30.95 -	  rm -f "$(comp_inst)"; \
   30.96 -	fi
   30.97 -# The code below guaranty that the netpatterns will be build before commpatterns
   30.98 -FORCE:
   30.99 -
  30.100 -$(top_ompi_builddir)/ompi/mca/common/netpatterns/libmca_common_netpatterns.la: FORCE
  30.101 -	(cd  $(top_ompi_builddir)/ompi/mca/common/netpatterns/ && $(MAKE) $(AM_MAKEFLAGS) libmca_common_netpatterns.la)
  30.102 -
  30.103 -install-libmca_common_netpatterns: FORCE
  30.104 -	(cd  $(top_ompi_builddir)/ompi/mca/common/netpatterns/ && $(MAKE) $(AM_MAKEFLAGS) install)
  30.105 -
  30.106 -install: install-libmca_common_netpatterns install-am
  30.107 -
    31.1 --- a/ompi/mca/common/commpatterns/common_allgather.c	Tue Feb 19 22:36:41 2013 +0000
    31.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.3 @@ -1,289 +0,0 @@
    31.4 -/*
    31.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    31.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    31.7 - * $COPYRIGHT$
    31.8 - * 
    31.9 - * Additional copyrights may follow
   31.10 - * 
   31.11 - * $HEADER$
   31.12 - */
   31.13 -/** @file */
   31.14 -
   31.15 -#include "ompi_config.h"
   31.16 -
   31.17 -#include "ompi/constants.h"
   31.18 -#include "ompi/op/op.h"
   31.19 -#include "ompi/datatype/ompi_datatype.h"
   31.20 -#include "ompi/communicator/communicator.h"
   31.21 -#include "orte/mca/rml/rml.h"
   31.22 -#include "opal/include/opal/sys/atomic.h"
   31.23 -#include "common_coll_ops.h"
   31.24 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
   31.25 -#include "ompi/mca/dpm/dpm.h"
   31.26 -#include "orte/util/proc_info.h"
   31.27 -#include "ompi/mca/pml/pml.h"
   31.28 -
   31.29 -/**
   31.30 - * All-reduce - subgroup in communicator
   31.31 - */
   31.32 -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
   31.33 -        ompi_datatype_t *dtype, int my_rank_in_group,
   31.34 -        int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
   31.35 -{
   31.36 -    /* local variables */
   31.37 -    int rc=OMPI_SUCCESS,msg_cnt;
   31.38 -    int pair_rank,exchange,extra_rank, n_extra_nodes,n_extra;
   31.39 -    int proc_block,extra_start,extra_end,iovec_len;
   31.40 -    int remote_data_start_rank,remote_data_end_rank;
   31.41 -    int local_data_start_rank;
   31.42 -    mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
   31.43 -    size_t message_extent,current_data_extent,current_data_count;
   31.44 -    size_t dt_size;
   31.45 -    OPAL_PTRDIFF_TYPE dt_extent;
   31.46 -    char *src_buf_current;
   31.47 -    char *dest_buf_current;
   31.48 -    struct iovec send_iov[2] = {{0,0},{0,0}}, 
   31.49 -                 recv_iov[2] = {{0,0},{0,0}};
   31.50 -    ompi_request_t *requests[4];
   31.51 -
   31.52 -    /* get size of data needed - same layout as user data, so that
   31.53 -     *   we can apply the reudction routines directly on these buffers
   31.54 -     */
   31.55 -    rc = ompi_datatype_type_size(dtype, &dt_size);
   31.56 -    if( OMPI_SUCCESS != rc ) {
   31.57 -        goto Error;
   31.58 -    }
   31.59 -
   31.60 -    rc = ompi_datatype_type_extent(dtype, &dt_extent);
   31.61 -    if( OMPI_SUCCESS != rc ) {
   31.62 -        goto Error;
   31.63 -    }
   31.64 -    message_extent = dt_extent*count;
   31.65 -
   31.66 -    /* place my data in the correct destination buffer */
   31.67 -    rc=ompi_datatype_copy_content_same_ddt(dtype,count,
   31.68 -            (char *)dest_buf+my_rank_in_group*message_extent,
   31.69 -            (char *)src_buf);
   31.70 -    if( OMPI_SUCCESS != rc ) {
   31.71 -        goto Error;
   31.72 -    }
   31.73 -
   31.74 -    /* 1 process special case */
   31.75 -    if(1 == n_peers) {
   31.76 -        return OMPI_SUCCESS;
   31.77 -    }
   31.78 -
   31.79 -    /* get my reduction communication pattern */
   31.80 -    rc = mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers, 
   31.81 -            my_rank_in_group, &my_exchange_node);
   31.82 -    if(OMPI_SUCCESS != rc){
   31.83 -        return rc;
   31.84 -    }
   31.85 -
   31.86 -    n_extra_nodes=n_peers-my_exchange_node.n_largest_pow_2;
   31.87 -
   31.88 -    /* get the data from the extra sources */
   31.89 -    if(0 < my_exchange_node.n_extra_sources)  {
   31.90 -
   31.91 -        if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
   31.92 -
   31.93 -            /*
   31.94 -             ** Receive data from extra node
   31.95 -             */
   31.96 -
   31.97 -            extra_rank=my_exchange_node.rank_extra_source;
   31.98 -            /* receive the data into the correct location - will use 2
   31.99 -             * messages in the recursive doubling phase */
  31.100 -            dest_buf_current=(char *)dest_buf+message_extent*extra_rank;
  31.101 -            rc=MCA_PML_CALL(recv(dest_buf_current,
  31.102 -                    count,dtype,ranks_in_comm[extra_rank],
  31.103 -                    -OMPI_COMMON_TAG_ALLREDUCE,
  31.104 -                    comm, MPI_STATUSES_IGNORE));
  31.105 -            if( 0 > rc ) {
  31.106 -                goto  Error;
  31.107 -            }
  31.108 -
  31.109 -        } else {
  31.110 -
  31.111 -            /*
  31.112 -             ** Send data to "partner" node
  31.113 -             */
  31.114 -            extra_rank=my_exchange_node.rank_extra_source;
  31.115 -            src_buf_current=(char *)src_buf;
  31.116 -            rc=MCA_PML_CALL(send(src_buf_current,
  31.117 -                    count,dtype,ranks_in_comm[extra_rank],
  31.118 -                    -OMPI_COMMON_TAG_ALLREDUCE,
  31.119 -                    MCA_PML_BASE_SEND_STANDARD,
  31.120 -                    comm));
  31.121 -            if( 0 > rc ) {
  31.122 -                goto  Error;
  31.123 -            }
  31.124 -        }
  31.125 -    }
  31.126 -
  31.127 -    current_data_extent=message_extent;
  31.128 -    current_data_count=count;
  31.129 -    src_buf_current=(char *)dest_buf+my_rank_in_group*message_extent;
  31.130 -    proc_block=1;
  31.131 -    local_data_start_rank=my_rank_in_group;
  31.132 -    /* loop over data exchanges */
  31.133 -    for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
  31.134 -
  31.135 -        /* is the remote data read */
  31.136 -        pair_rank=my_exchange_node.rank_exchanges[exchange];
  31.137 -        msg_cnt=0;
  31.138 -
  31.139 -        /*
  31.140 -         * Power of 2 data segment 
  31.141 -         */
  31.142 -        /* post non-blocking receive */
  31.143 -        if(pair_rank > my_rank_in_group ){
  31.144 -            recv_iov[0].iov_base=src_buf_current+current_data_extent;
  31.145 -            recv_iov[0].iov_len=current_data_extent;
  31.146 -            iovec_len=1;
  31.147 -            remote_data_start_rank=local_data_start_rank+proc_block;
  31.148 -            remote_data_end_rank=remote_data_start_rank+proc_block-1;
  31.149 -        } else {
  31.150 -            recv_iov[0].iov_base=src_buf_current-current_data_extent;
  31.151 -            recv_iov[0].iov_len=current_data_extent;
  31.152 -            iovec_len=1;
  31.153 -            remote_data_start_rank=local_data_start_rank-proc_block;
  31.154 -            remote_data_end_rank=remote_data_start_rank+proc_block-1;
  31.155 -        }
  31.156 -        /* the data from the non power of 2 ranks */
  31.157 -        if(remote_data_start_rank<n_extra_nodes) {
  31.158 -            /* figure out how much data is at the remote rank */
  31.159 -            /* last rank with data */
  31.160 -            extra_start=remote_data_start_rank;
  31.161 -            extra_end=remote_data_end_rank;
  31.162 -            if(extra_end >= n_extra_nodes ) {
  31.163 -                /* if last rank exceeds the ranks with extra data,
  31.164 -                 * adjust this.
  31.165 -                 */
  31.166 -                extra_end=n_extra_nodes-1;
  31.167 -            }
  31.168 -            /* get the number of ranks whos data is to be grabbed */
  31.169 -            n_extra=extra_end-extra_start+1;
  31.170 -
  31.171 -            recv_iov[1].iov_base=(char *)dest_buf+
  31.172 -                (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
  31.173 -            recv_iov[1].iov_len=n_extra*count;
  31.174 -            iovec_len=2;
  31.175 -        }
  31.176 -
  31.177 -        rc=MCA_PML_CALL(irecv(recv_iov[0].iov_base,
  31.178 -                    current_data_count,dtype,ranks_in_comm[pair_rank],
  31.179 -                    -OMPI_COMMON_TAG_ALLREDUCE,
  31.180 -                    comm,&(requests[msg_cnt])));
  31.181 -        if( 0 > rc ) {
  31.182 -            goto Error;
  31.183 -        }
  31.184 -        msg_cnt++;
  31.185 -
  31.186 -        if(iovec_len > 1 ) {
  31.187 -            rc=MCA_PML_CALL(irecv(recv_iov[1].iov_base,
  31.188 -                        recv_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
  31.189 -                        -OMPI_COMMON_TAG_ALLREDUCE,
  31.190 -                        comm,&(requests[msg_cnt])));
  31.191 -            if( 0 > rc ) {
  31.192 -                goto Error;
  31.193 -            }
  31.194 -            msg_cnt++;
  31.195 -        }
  31.196 -
  31.197 -        /* post non-blocking send */
  31.198 -        send_iov[0].iov_base=src_buf_current;
  31.199 -        send_iov[0].iov_len=current_data_extent;
  31.200 -        iovec_len=1;
  31.201 -        /* the data from the non power of 2 ranks */
  31.202 -        if(local_data_start_rank<n_extra_nodes) {
  31.203 -            /* figure out how much data is at the remote rank */
  31.204 -            /* last rank with data */
  31.205 -            extra_start=local_data_start_rank;
  31.206 -            extra_end=extra_start+proc_block-1;
  31.207 -            if(extra_end >= n_extra_nodes ) {
  31.208 -                /* if last rank exceeds the ranks with extra data,
  31.209 -                 * adjust this.
  31.210 -                 */
  31.211 -                extra_end=n_extra_nodes-1;
  31.212 -            }
  31.213 -            /* get the number of ranks whos data is to be grabbed */
  31.214 -            n_extra=extra_end-extra_start+1;
  31.215 -
  31.216 -            send_iov[1].iov_base=(char *)dest_buf+
  31.217 -                (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
  31.218 -            send_iov[1].iov_len=n_extra*count;
  31.219 -            iovec_len=2;
  31.220 -        }
  31.221 -
  31.222 -        rc=MCA_PML_CALL(isend(send_iov[0].iov_base,
  31.223 -                    current_data_count,dtype,ranks_in_comm[pair_rank],
  31.224 -                    -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
  31.225 -                    comm,&(requests[msg_cnt])));
  31.226 -        if( 0 > rc ) {
  31.227 -            goto Error;
  31.228 -        }
  31.229 -        msg_cnt++;
  31.230 -        if( iovec_len > 1 ) { 
  31.231 -            rc=MCA_PML_CALL(isend(send_iov[1].iov_base,
  31.232 -                        send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
  31.233 -                        -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
  31.234 -                        comm,&(requests[msg_cnt])));
  31.235 -            if( 0 > rc ) {
  31.236 -                goto Error;
  31.237 -            }
  31.238 -            msg_cnt++;
  31.239 -        }
  31.240 -
  31.241 -        /* prepare the source buffer for the next iteration */
  31.242 -        if(pair_rank < my_rank_in_group ){
  31.243 -            src_buf_current-=current_data_extent;
  31.244 -            local_data_start_rank-=proc_block;
  31.245 -        } 
  31.246 -        proc_block*=2;
  31.247 -        current_data_extent*=2;
  31.248 -        current_data_count*=2;
  31.249 -
  31.250 -        /* wait on send and receive completion */
  31.251 -        ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
  31.252 -    }
  31.253 -
  31.254 -    /* copy data in from the "extra" source, if need be */
  31.255 -    if(0 < my_exchange_node.n_extra_sources)  {
  31.256 -
  31.257 -        if ( EXTRA_NODE == my_exchange_node.node_type ) {
  31.258 -            /* 
  31.259 -             ** receive the data 
  31.260 -             ** */
  31.261 -            extra_rank=my_exchange_node.rank_extra_source;
  31.262 -
  31.263 -            rc=MCA_PML_CALL(recv(dest_buf,
  31.264 -                    count*n_peers,dtype,ranks_in_comm[extra_rank],
  31.265 -                    -OMPI_COMMON_TAG_ALLREDUCE,
  31.266 -                    comm,MPI_STATUSES_IGNORE));
  31.267 -            if(0 > rc ) {
  31.268 -                goto  Error;
  31.269 -            }
  31.270 -        } else {
  31.271 -            /* send the data to the pair-rank outside of the power of 2 set
  31.272 -             ** of ranks
  31.273 -             */
  31.274 -
  31.275 -            extra_rank=my_exchange_node.rank_extra_source;
  31.276 -            rc=MCA_PML_CALL(send(dest_buf,
  31.277 -                    count*n_peers,dtype,ranks_in_comm[extra_rank],
  31.278 -                    -OMPI_COMMON_TAG_ALLREDUCE,
  31.279 -                    MCA_PML_BASE_SEND_STANDARD,
  31.280 -                    comm));
  31.281 -            if( 0 > rc ) {
  31.282 -                goto  Error;
  31.283 -            }
  31.284 -        }
  31.285 -    }
  31.286 -
  31.287 -    /* return */
  31.288 -    return OMPI_SUCCESS;
  31.289 -
  31.290 -Error:
  31.291 -    return rc;
  31.292 -}
    32.1 --- a/ompi/mca/common/commpatterns/common_allreduce.c	Tue Feb 19 22:36:41 2013 +0000
    32.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.3 @@ -1,256 +0,0 @@
    32.4 -/*
    32.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    32.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    32.7 - * $COPYRIGHT$
    32.8 - * 
    32.9 - * Additional copyrights may follow
   32.10 - * 
   32.11 - * $HEADER$
   32.12 - */
   32.13 -/** @file */
   32.14 -
   32.15 -#include "ompi_config.h"
   32.16 -
   32.17 -#include "ompi/constants.h"
   32.18 -#include "ompi/op/op.h"
   32.19 -#include "ompi/datatype/ompi_datatype.h"
   32.20 -#include "ompi/communicator/communicator.h"
   32.21 -#include "orte/mca/rml/rml.h"
   32.22 -#include "opal/include/opal/sys/atomic.h"
   32.23 -#include "ompi/mca/common/commpatterns/common_netpatterns.h"
   32.24 -#include "common_coll_ops.h"
   32.25 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
   32.26 -#include "ompi/mca/dpm/dpm.h"
   32.27 -#include "orte/util/proc_info.h"
   32.28 -#include "ompi/mca/pml/pml.h"
   32.29 -
   32.30 -/**
   32.31 - * All-reduce for contigous primitive types
   32.32 - */
   32.33 -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, 
   32.34 -        ompi_datatype_t *dtype, int my_rank_in_group,
   32.35 -        struct ompi_op_t *op, int n_peers,int *ranks_in_comm, 
   32.36 -        ompi_communicator_t *comm)
   32.37 -{
   32.38 -    /* local variables */
   32.39 -    int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
   32.40 -    int pair_rank,exchange,extra_rank;
   32.41 -    mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
   32.42 -    int count_processed,count_this_stripe;
   32.43 -    size_t dt_size,dt_extent;
   32.44 -    char scratch_bufers[2][MAX_TMP_BUFFER];
   32.45 -    int send_buffer=0,recv_buffer=1;
   32.46 -    char *sbuf_current, *rbuf_current;
   32.47 -    ompi_request_t *requests[2];
   32.48 -
   32.49 -    /* get size of data needed - same layout as user data, so that
   32.50 -     *   we can apply the reudction routines directly on these buffers
   32.51 -     */
   32.52 -    rc = opal_datatype_type_size((opal_datatype_t *)dtype, &dt_size);
   32.53 -    if( OMPI_SUCCESS != rc ) {
   32.54 -        goto Error;
   32.55 -    }
   32.56 -    rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent);
   32.57 -    if( OMPI_SUCCESS != rc ) {
   32.58 -        goto Error;
   32.59 -    }
   32.60 -    
   32.61 -    /* 1 process special case */
   32.62 -    if(1 == n_peers) {
   32.63 -        /* place my data in the correct destination buffer */
   32.64 -        rc=ompi_datatype_copy_content_same_ddt(dtype,count,
   32.65 -                (char *)rbuf, (char *)sbuf);
   32.66 -        if( OMPI_SUCCESS != rc ) {
   32.67 -            goto Error;
   32.68 -        }
   32.69 -        return OMPI_SUCCESS;
   32.70 -    }
   32.71 -
   32.72 -    /* number of data types copies that the scratch buffer can hold */
   32.73 -    n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_extent;
   32.74 -    if ( 0 == n_dts_per_buffer ) {
   32.75 -        rc=OMPI_ERROR;
   32.76 -        goto Error;
   32.77 -    }
   32.78 -
   32.79 -    /* compute number of stripes needed to process this collective */
   32.80 -    n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
   32.81 -
   32.82 -    /* get my reduction communication pattern */
   32.83 -    rc = mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,
   32.84 -            my_rank_in_group, &my_exchange_node);
   32.85 -    if(OMPI_SUCCESS != rc){
   32.86 -        return rc;
   32.87 -    }
   32.88 -
   32.89 -    count_processed=0;
   32.90 -
   32.91 -    /* get a pointer to the shared-memory working buffer */
   32.92 -    /* NOTE: starting with a rather synchronous approach */
   32.93 -    for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
   32.94 -
   32.95 -        /* get number of elements to process in this stripe */
   32.96 -        count_this_stripe=n_dts_per_buffer;
   32.97 -        if( count_processed + count_this_stripe > count )
   32.98 -            count_this_stripe=count-count_processed;
   32.99 -
  32.100 -        /* copy data from the input buffer into the temp buffer */
  32.101 -        sbuf_current=(char *)sbuf+count_processed*dt_extent;
  32.102 -        rc=ompi_datatype_copy_content_same_ddt(dtype,count_this_stripe,
  32.103 -                scratch_bufers[send_buffer], sbuf_current);
  32.104 -        if( OMPI_SUCCESS != rc ) {
  32.105 -            goto Error;
  32.106 -        }
  32.107 -
  32.108 -        /* copy data in from the "extra" source, if need be */
  32.109 -        if(0 < my_exchange_node.n_extra_sources)  {
  32.110 -
  32.111 -            if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
  32.112 -                
  32.113 -                /*
  32.114 -                ** Receive data from extra node
  32.115 -                */
  32.116 -                extra_rank=my_exchange_node.rank_extra_source;
  32.117 -                rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
  32.118 -                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  32.119 -                            -OMPI_COMMON_TAG_ALLREDUCE, comm,
  32.120 -                            MPI_STATUSES_IGNORE));
  32.121 -                if( 0 > rc ) {
  32.122 -                    fprintf(stderr,"  first recv failed in comm_allreduce_pml \n");
  32.123 -                    fflush(stderr);
  32.124 -                    goto  Error;
  32.125 -                }
  32.126 -
  32.127 -
  32.128 -                /* apply collective operation to first half of the data */
  32.129 -                if( 0 < count_this_stripe ) {
  32.130 -                    ompi_op_reduce(op,
  32.131 -                            (void *)scratch_bufers[send_buffer],
  32.132 -                            (void *)scratch_bufers[recv_buffer],
  32.133 -                            count_this_stripe,dtype);
  32.134 -                }
  32.135 -
  32.136 -
  32.137 -            } else {
  32.138 -        
  32.139 -                /*
  32.140 -                ** Send data to "partner" node
  32.141 -                */
  32.142 -                extra_rank=my_exchange_node.rank_extra_source;
  32.143 -                rc=MCA_PML_CALL(send(scratch_bufers[send_buffer],
  32.144 -                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  32.145 -                            -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
  32.146 -                            comm));
  32.147 -                if( 0 > rc ) {
  32.148 -                    fprintf(stderr,"  first send failed in comm_allreduce_pml \n");
  32.149 -                    fflush(stderr);
  32.150 -                    goto  Error;
  32.151 -                }
  32.152 -            }
  32.153 -
  32.154 -            /* change pointer to scratch buffer - this was we can send data
  32.155 -            ** that we have summed w/o a memory copy, and receive data into the
  32.156 -            ** other buffer, w/o fear of over writting data that has not yet
  32.157 -            ** completed being send
  32.158 -            */
  32.159 -            recv_buffer^=1;
  32.160 -            send_buffer^=1;
  32.161 -        }
  32.162 -
  32.163 -        /* loop over data exchanges */
  32.164 -        for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
  32.165 -
  32.166 -            /* is the remote data read */
  32.167 -            pair_rank=my_exchange_node.rank_exchanges[exchange];
  32.168 -
  32.169 -            /* post non-blocking receive */
  32.170 -            rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer],
  32.171 -                        count_this_stripe,dtype,ranks_in_comm[pair_rank],
  32.172 -                        -OMPI_COMMON_TAG_ALLREDUCE,
  32.173 -                        comm,&(requests[0])));
  32.174 -            if( 0 > rc ) {
  32.175 -                fprintf(stderr,"  irecv failed in  comm_allreduce_pml at iterations %d \n",
  32.176 -                        exchange);
  32.177 -                fflush(stderr);
  32.178 -                goto Error;
  32.179 -            }
  32.180 -
  32.181 -            /* post non-blocking send */
  32.182 -            rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer],
  32.183 -                        count_this_stripe,dtype, ranks_in_comm[pair_rank],
  32.184 -                        -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
  32.185 -                        comm,&(requests[1])));
  32.186 -            if( 0 > rc ) {
  32.187 -                fprintf(stderr,"  isend failed in  comm_allreduce_pml at iterations %d \n",
  32.188 -                        exchange);
  32.189 -                fflush(stderr);
  32.190 -                goto Error;
  32.191 -            }
  32.192 -            /* wait on send and receive completion */
  32.193 -            ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE);
  32.194 -
  32.195 -            /* reduce the data */
  32.196 -            if( 0 < count_this_stripe ) {
  32.197 -                ompi_op_reduce(op,
  32.198 -                        (void *)scratch_bufers[send_buffer],
  32.199 -                        (void *)scratch_bufers[recv_buffer],
  32.200 -                        count_this_stripe,dtype);
  32.201 -            }
  32.202 -            /* get ready for next step */
  32.203 -            recv_buffer^=1;
  32.204 -            send_buffer^=1;
  32.205 -
  32.206 -        }
  32.207 -
  32.208 -        /* copy data in from the "extra" source, if need be */
  32.209 -        if(0 < my_exchange_node.n_extra_sources)  {
  32.210 -
  32.211 -            if ( EXTRA_NODE == my_exchange_node.node_type ) {
  32.212 -                /* 
  32.213 -                ** receive the data 
  32.214 -                ** */
  32.215 -                extra_rank=my_exchange_node.rank_extra_source;
  32.216 -                rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
  32.217 -                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  32.218 -                            -OMPI_COMMON_TAG_ALLREDUCE, comm,
  32.219 -                            MPI_STATUSES_IGNORE));
  32.220 -                if( 0 > rc ) {
  32.221 -                    fprintf(stderr,"  last recv failed in comm_allreduce_pml \n");
  32.222 -                    fflush(stderr);
  32.223 -                    goto  Error;
  32.224 -                }
  32.225 -
  32.226 -                recv_buffer^=1;
  32.227 -                send_buffer^=1;
  32.228 -            } else {
  32.229 -                /* send the data to the pair-rank outside of the power of 2 set
  32.230 -                ** of ranks
  32.231 -                */
  32.232 -
  32.233 -                extra_rank=my_exchange_node.rank_extra_source;
  32.234 -                rc=MCA_PML_CALL(send((char *)scratch_bufers[send_buffer],
  32.235 -                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  32.236 -                            -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
  32.237 -                            comm));
  32.238 -                if( 0 > rc ) {
  32.239 -                    fprintf(stderr,"  last send failed in comm_allreduce_pml \n");
  32.240 -                    fflush(stderr);
  32.241 -                    goto  Error;
  32.242 -                }
  32.243 -            }
  32.244 -        }
  32.245 -
  32.246 -        /* copy data from the temp buffer into the output buffer */
  32.247 -        rbuf_current = (char *) rbuf + count_processed * dt_size;
  32.248 -        memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size);
  32.249 -    
  32.250 -        /* update the count of elements processed */
  32.251 -        count_processed += count_this_stripe;
  32.252 -    }
  32.253 -
  32.254 -    /* return */
  32.255 -    return OMPI_SUCCESS;
  32.256 -
  32.257 -Error:
  32.258 -    return rc;
  32.259 -}
    33.1 --- a/ompi/mca/common/commpatterns/common_bcast.c	Tue Feb 19 22:36:41 2013 +0000
    33.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.3 @@ -1,98 +0,0 @@
    33.4 -/*
    33.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    33.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    33.7 - * $COPYRIGHT$
    33.8 - * 
    33.9 - * Additional copyrights may follow
   33.10 - * 
   33.11 - * $HEADER$
   33.12 - */
   33.13 -/** @file */
   33.14 -
   33.15 -#include "ompi_config.h"
   33.16 -
   33.17 -#include "ompi/constants.h"
   33.18 -#include "ompi/op/op.h"
   33.19 -#include "ompi/datatype/ompi_datatype.h"
   33.20 -#include "ompi/communicator/communicator.h"
   33.21 -#include "orte/mca/rml/rml.h"
   33.22 -#include "opal/include/opal/sys/atomic.h"
   33.23 -#include "common_coll_ops.h"
   33.24 -#include "ompi/mca/common/netpatterns/common_netpatterns.h"
   33.25 -#include "ompi/mca/dpm/dpm.h"
   33.26 -#include "orte/util/proc_info.h"
   33.27 -#include "ompi/mca/pml/pml.h"
   33.28 -
   33.29 -/**
   33.30 - * Bcast - subgroup in communicator
   33.31 - *  This is a very simple algorithm - binary tree, transmitting the full
   33.32 - *  message at each step.
   33.33 - */
   33.34 -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
   33.35 -        ompi_datatype_t *dtype, int my_rank_in_group,
   33.36 -        int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
   33.37 -{
   33.38 -    /* local variables */
   33.39 -    int rc=OMPI_SUCCESS,msg_cnt,i;
   33.40 -    ompi_request_t *requests[2];
   33.41 -    int node_rank, peer_rank;
   33.42 -    mca_common_netpatterns_tree_node_t node_data;
   33.43 -
   33.44 -    /*
   33.45 -     * shift rank to root==0 tree
   33.46 -     */
   33.47 -    node_rank=(my_rank_in_group-root+n_peers)%n_peers;
   33.48 -
   33.49 -    /*
   33.50 -     * compute my communication pattern - binary tree
   33.51 -     */
   33.52 -    rc=mca_common_netpatterns_setup_narray_tree(2, node_rank, n_peers,
   33.53 -            &node_data);
   33.54 -    if( OMPI_SUCCESS != rc ) {
   33.55 -        goto Error;
   33.56 -    }
   33.57 -
   33.58 -    /* 1 process special case */
   33.59 -    if(1 == n_peers) {
   33.60 -        return OMPI_SUCCESS;
   33.61 -    }
   33.62 -
   33.63 -    /* if I have parents - wait on the data to arrive */
   33.64 -    if(node_data.n_parents) {
   33.65 -        /* I will have only 1 parent */
   33.66 -        peer_rank=node_data.parent_rank;
   33.67 -        peer_rank=(peer_rank+root)%n_peers;
   33.68 -        /* translate back to actual rank */
   33.69 -        rc=MCA_PML_CALL(recv(buffer, count,dtype,peer_rank,
   33.70 -                    -OMPI_COMMON_TAG_BCAST, comm, MPI_STATUSES_IGNORE));
   33.71 -        if( 0 > rc ) {
   33.72 -            goto Error;
   33.73 -        }
   33.74 -    }
   33.75 -
   33.76 -    /* send the data to my children */
   33.77 -    msg_cnt=0;
   33.78 -    for(i=0 ; i < node_data.n_children ; i++ ) {
   33.79 -        peer_rank=node_data.children_ranks[i];
   33.80 -        peer_rank=(peer_rank+root)%n_peers;
   33.81 -        rc=MCA_PML_CALL(isend(buffer,
   33.82 -                    count,dtype,peer_rank,
   33.83 -                    -OMPI_COMMON_TAG_BCAST,MCA_PML_BASE_SEND_STANDARD,
   33.84 -                    comm,&(requests[msg_cnt])));
   33.85 -        if( 0 > rc ) {
   33.86 -            goto Error;
   33.87 -        }
   33.88 -        msg_cnt++;
   33.89 -    }
   33.90 -    /* wait for send completion */
   33.91 -    if(msg_cnt) {
   33.92 -        /* wait on send and receive completion */
   33.93 -        ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
   33.94 -    }
   33.95 -
   33.96 -    /* return */
   33.97 -    return OMPI_SUCCESS;
   33.98 -
   33.99 -Error:
  33.100 -    return rc;
  33.101 -}
    34.1 --- a/ompi/mca/common/commpatterns/common_coll_ops.h	Tue Feb 19 22:36:41 2013 +0000
    34.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.3 @@ -1,48 +0,0 @@
    34.4 -/*
    34.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    34.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    34.7 - * $COPYRIGHT$
    34.8 - *
    34.9 - * Additional copyrights may follow
   34.10 - *
   34.11 - * $HEADER$
   34.12 - */
   34.13 -
   34.14 -#ifndef COMM_COLL_OP_TYPES_H
   34.15 -#define COMM_COLL_OP_TYPES_H
   34.16 -
   34.17 -#include "ompi_config.h"
   34.18 -#include "ompi/datatype/ompi_datatype.h"
   34.19 -#include "ompi/proc/proc.h"
   34.20 -
   34.21 -BEGIN_C_DECLS
   34.22 -
   34.23 -#define OMPI_COMMON_TAG_ALLREDUCE 99
   34.24 -#define OMPI_COMMON_TAG_BCAST     98
   34.25 -
   34.26 -
   34.27 -
   34.28 -
   34.29 -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
   34.30 -        ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, 
   34.31 -        int *ranks_in_comm,ompi_communicator_t *comm);
   34.32 -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
   34.33 -        ompi_datatype_t *dtype, int my_rank_in_group,
   34.34 -        struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
   34.35 -        ompi_communicator_t *comm);
   34.36 -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
   34.37 -        ompi_datatype_t *dtype, int my_rank_in_group,
   34.38 -        int n_peers, int *ranks_in_comm,ompi_communicator_t
   34.39 -        *comm);
   34.40 -
   34.41 -/* reduction operations supported */
   34.42 -#define OP_SUM 1
   34.43 -#define OP_MAX 2
   34.44 -#define OP_MIN 3
   34.45 -
   34.46 -#define TYPE_INT4 1
   34.47 -
   34.48 -
   34.49 -END_C_DECLS
   34.50 -
   34.51 -#endif /* COMM_COLL_OP_TYPES_H */
    35.1 --- a/ompi/mca/common/commpatterns/common_netpatterns.h	Tue Feb 19 22:36:41 2013 +0000
    35.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.3 @@ -1,24 +0,0 @@
    35.4 -/*
    35.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    35.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    35.7 - * $COPYRIGHT$
    35.8 - *
    35.9 - * Additional copyrights may follow
   35.10 - *
   35.11 - * $HEADER$
   35.12 - */
   35.13 -
   35.14 -#ifndef COMM_NETPATTERNS_H
   35.15 -#define COMM_NETPATTERNS_H
   35.16 -
   35.17 -#include "ompi_config.h"
   35.18 -#include "orte/include/orte/types.h"
   35.19 -#include "orte/mca/rml/rml_types.h"
   35.20 -
   35.21 -BEGIN_C_DECLS
   35.22 -
   35.23 -#define MAX_TMP_BUFFER            8192
   35.24 -
   35.25 -END_C_DECLS
   35.26 -
   35.27 -#endif /* COMM_NETPATTERNS_H */
    36.1 --- a/ompi/mca/common/commpatterns/ompi_common_netpatterns_macros.h	Tue Feb 19 22:36:41 2013 +0000
    36.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.3 @@ -1,52 +0,0 @@
    36.4 -/*
    36.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    36.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    36.7 - * $COPYRIGHT$
    36.8 - *
    36.9 - * Additional copyrights may follow
   36.10 - *
   36.11 - * $HEADER$
   36.12 - */
   36.13 -
   36.14 -#ifndef OMPI_COMMON_NETPATTERNS_MACROS_H
   36.15 -#define OMPI_COMMON_NETPATTERNS_MACROS_H
   36.16 -
   36.17 -#include "ompi_config.h"
   36.18 -
   36.19 -BEGIN_C_DECLS
   36.20 -
   36.21 -/* function to decompose an interger into it's representation in base K */
   36.22 -/*
   36.23 - * input_value - value to translate (input)
   36.24 - * base - base of representation (input)
   36.25 - * highest_power - the highest power that may have a non-zero entry (input)
   36.26 - *    the assumption is that this will be called in the critical path
   36.27 - *    to compute communication patterns, so will precompute such values
   36.28 - *    and pass the in.
   36.29 - * base_to_power_i - array of base to ith power (input)
   36.30 - * cum_base_to_power_i - array of cummulative base to ith power (input)
   36.31 - * base_k_rep - representation in base "base".  Space is pre-allocated. (out)
   36.32 - */
   36.33 -static inline  __opal_attribute_always_inline__ void
   36.34 -common_netpatterns_obtain_rep_base_k (int input_value, int base,
   36.35 -        int highest_power, int *base_to_power_i,
   36.36 -        int *base_k_rep
   36.37 -        )
   36.38 -{
   36.39 -    /* local variables */
   36.40 -    int lvl, work_value;
   36.41 -
   36.42 -    /* loop over all possible powers */
   36.43 -    work_value=input_value;
   36.44 -    for( lvl=highest_power ; lvl >= 0 ; lvl-- ) {
   36.45 -        /* still need to compute the actual coefficient */
   36.46 -        base_k_rep[lvl]=work_value/base_to_power_i[lvl];
   36.47 -        work_value-=(base_k_rep[lvl]*base_to_power_i[lvl]);
   36.48 -
   36.49 -    }
   36.50 -
   36.51 -}
   36.52 -
   36.53 -END_C_DECLS
   36.54 -
   36.55 -#endif /* OMPI_COMMON_NETPATTERNS_MACROS_H */
    37.1 --- a/ompi/mca/common/netpatterns/.windows	Tue Feb 19 22:36:41 2013 +0000
    37.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.3 @@ -1,12 +0,0 @@
    37.4 -#
    37.5 -# Copyright (c) 2008-2012 High Performance Computing Center Stuttgart, 
    37.6 -#                         University of Stuttgart.  All rights reserved.
    37.7 -# $COPYRIGHT$
    37.8 -# 
    37.9 -# Additional copyrights may follow
   37.10 -# 
   37.11 -# $HEADER$
   37.12 -#
   37.13 -
   37.14 -# Specific to this module
   37.15 -exclude_list=common_allreduce.c
   37.16 \ No newline at end of file
    38.1 --- a/ompi/mca/common/netpatterns/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
    38.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.3 @@ -1,94 +0,0 @@
    38.4 -#
    38.5 -# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    38.6 -# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    38.7 -# $COPYRIGHT$
    38.8 -# 
    38.9 -# Additional copyrights may follow
   38.10 -# 
   38.11 -# $HEADER$
   38.12 -#
   38.13 -
   38.14 -# A word of explanation...
   38.15 -#
   38.16 -# This library is linked against various MCA components because all
   38.17 -# shared-memory based components (e.g., mpool, ptl, etc.)  need to
   38.18 -# share some common code and data.  There's two cases:
   38.19 -#
   38.20 -# 1. libmca_common_netpatterns.la is a shared library.  By linking that shared
   38.21 -# library to all components that need it, the OS linker will
   38.22 -# automatically load it into the process as necessary, and there will
   38.23 -# only be one copy (i.e., all the components will share *one* copy of
   38.24 -# the code and data).
   38.25 -#
   38.26 -# 2. libmca_common_netpatterns.la is a static library.  In this case, it will
   38.27 -# be rolled up into the top-level libmpi.la.  It will also be rolled
   38.28 -# into each component, but then the component will also be rolled up
   38.29 -# into the upper-level libmpi.la.  Linkers universally know how to
   38.30 -# "figure this out" so that we end up with only one copy of the code
   38.31 -# and data.
   38.32 -#
   38.33 -# Note that building this common component statically and linking
   38.34 -# against other dynamic components is *not* supported!
   38.35 -
   38.36 -EXTRA_DIST = .windows
   38.37 -
   38.38 -# Header files
   38.39 -
   38.40 -headers = \
   38.41 -        common_netpatterns.h \
   38.42 -        common_netpatterns_knomial_tree.h \
   38.43 -        common_coll_ops.h
   38.44 -
   38.45 -# Source files
   38.46 -
   38.47 -sources = \
   38.48 -		common_netpatterns_base.c \
   38.49 -		common_netpatterns_multinomial_tree.c \
   38.50 -		common_netpatterns_nary_tree.c \
   38.51 -		common_netpatterns_knomial_tree.c
   38.52 -
   38.53 -#       common_allreduce.c # the allredeace is broken
   38.54 -
   38.55 -# As per above, we'll either have an installable or noinst result.
   38.56 -# The installable one should follow the same MCA prefix naming rules
   38.57 -# (i.e., libmca_<type>_<name>.la).  The noinst one can be named
   38.58 -# whatever it wants, although libmca_<type>_<name>_noinst.la is
   38.59 -# recommended.
   38.60 -
   38.61 -# To simplify components that link to this library, we will *always*
   38.62 -# have an output libtool library named libmca_<type>_<name>.la -- even
   38.63 -# for case 2) described above (i.e., so there's no conditional logic
   38.64 -# necessary in component Makefile.am's that link to this library).
   38.65 -# Hence, if we're creating a noinst version of this library (i.e.,
   38.66 -# case 2), we sym link it to the libmca_<type>_<name>.la name
   38.67 -# (libtool will do the Right Things under the covers).  See the
   38.68 -# all-local and clean-local rules, below, for how this is effected.
   38.69 -
   38.70 -lib_LTLIBRARIES =
   38.71 -noinst_LTLIBRARIES =
   38.72 -comp_inst = libmca_common_netpatterns.la
   38.73 -comp_noinst = libmca_common_netpatterns_noinst.la
   38.74 -
   38.75 -if MCA_BUILD_ompi_common_netpatterns_DSO
   38.76 -lib_LTLIBRARIES += $(comp_inst)
   38.77 -else
   38.78 -noinst_LTLIBRARIES += $(comp_noinst)
   38.79 -endif
   38.80 -
   38.81 -libmca_common_netpatterns_la_SOURCES = $(headers) $(sources)
   38.82 -libmca_common_netpatterns_noinst_la_SOURCES = $(libmca_common_netpatterns_la_SOURCES)
   38.83 -
   38.84 -# These two rules will sym link the "noinst" libtool library filename
   38.85 -# to the installable libtool library filename in the case where we are
   38.86 -# compiling this component statically (case 2), described above).
   38.87 -
   38.88 -all-local:
   38.89 -	if test -z "$(lib_LTLIBRARIES)"; then \
   38.90 -	  rm -f "$(comp_inst)"; \
   38.91 -	  $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
   38.92 -	fi
   38.93 -
   38.94 -clean-local:
   38.95 -	if test -z "$(lib_LTLIBRARIES)"; then \
   38.96 -	  rm -f "$(comp_inst)"; \
   38.97 -	fi
    39.1 --- a/ompi/mca/common/netpatterns/common_allreduce.c	Tue Feb 19 22:36:41 2013 +0000
    39.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.3 @@ -1,344 +0,0 @@
    39.4 -/*
    39.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    39.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    39.7 - * $COPYRIGHT$
    39.8 - * 
    39.9 - * Additional copyrights may follow
   39.10 - * 
   39.11 - * $HEADER$
   39.12 - */
   39.13 -/** @file */
   39.14 -
   39.15 -#include "ompi_config.h"
   39.16 -
   39.17 -#include "ompi/constants.h"
   39.18 -#include "coll_sm2.h"
   39.19 -#include "ompi/op/op.h"
   39.20 -#include "ompi/datatype/ompi_datatype.h"
   39.21 -#include "ompi/communicator/communicator.h"
   39.22 -
   39.23 -orte_rml_callback_fn_t send_completion(nt status, struct orte_process_name_t* peer, struct iovec* msg, 
   39.24 -        int count, orte_rml_tag_t tag, void* cbdata)
   39.25 -{
   39.26 -    /* set send completion flag */
   39.27 -    *(int *)cbdata=1;
   39.28 -}
   39.29 -
   39.30 -
   39.31 -orte_rml_module_recv_nb_fn_t recv_completion(nt status, struct orte_process_name_t* peer, struct iovec* msg, 
   39.32 -        int count, orte_rml_tag_t tag, void* cbdata)
   39.33 -{
   39.34 -    /* set receive completion flag */
   39.35 -    MB();
   39.36 -    *(int *)cbdata=1;
   39.37 -}
   39.38 -
   39.39 -
   39.40 -static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int count,
   39.41 -        int data_type)
   39.42 -{
   39.43 -    /* local variables */
   39.44 -    int ret;
   39.45 -
   39.46 -    /* op type */
   39.47 -    switch (op_type) {
   39.48 -
   39.49 -        case OP_SUM:
   39.50 -
   39.51 -            
   39.52 -            switch (data_type) {
   39.53 -                case TYPE_INT4:
   39.54 -                    int *int_src_ptr=(int *)src_ptr;
   39.55 -                    int *int_src_dst_ptr=(int *)src_dst_ptr;
   39.56 -                    int cnt;
   39.57 -                    for(cnt=0 ; cnt < count ; ) {
   39.58 -                        (*(int_src_dst_ptr))+=(*(int_src_ptr));
   39.59 -                    break;
   39.60 -                default:
   39.61 -                    ret=OMPI_ERROR;
   39.62 -                    goto Error;
   39.63 -            }
   39.64 -
   39.65 -            break;
   39.66 -
   39.67 -        default:
   39.68 -        ret=OMPI_ERROR;
   39.69 -        goto Error;
   39.70 -    }
   39.71 -Error:
   39.72 -    return ret;
   39.73 -}
   39.74 -
   39.75 -/**
   39.76 - * All-reduce for contigous primitive types
   39.77 - */
   39.78 -static
   39.79 -comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, 
   39.80 -        int op_type, opal_list_t *peers)
   39.81 -{
   39.82 -    /* local variables */
   39.83 -    int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
   39.84 -    int pair_rank,exchange,extra_rank;
   39.85 -    int index_read,index_write;
   39.86 -    mca_common_netpatterns_pair_exchange_node_t my_exchange_node;
   39.87 -    int my_rank,count_processed,count_this_stripe;
   39.88 -    size_t n_peers,message_extent,len_data_buffer;
   39.89 -    size_t dt_size;
   39.90 -    long long tag, base_tag;
   39.91 -    sm_work_buffer_t *sm_buffer_desc;
   39.92 -    opal_list_item_t *item;
   39.93 -    char scratch_bufers[2][MAX_TMP_BUFFER];
   39.94 -    int send_buffer=0;recv_buffer=1;
   39.95 -    char *sbuf_current,*rbuf_current;
   39.96 -    ompi_proc_t **proc_array;
   39.97 -    struct iovec send_iov, recv_iov;
   39.98 -    volatile int *recv_done, *send_done;
   39.99 -    int recv_completion_flag, send_completion_flag;
  39.100 -    int data_type;
  39.101 -
  39.102 -    /* get size of data needed - same layout as user data, so that
  39.103 -     *   we can apply the reudction routines directly on these buffers
  39.104 -     */
  39.105 -    rc=opal_datatype_type_size(dtype, &dt_size);
  39.106 -    if( OMPI_SUCCESS != rc ) {
  39.107 -        goto Error;
  39.108 -    }
  39.109 -    message_extent=dt_extent*count;
  39.110 -
  39.111 -    /* lenght of control and data regions */
  39.112 -    len_data_buffer=sm_module->data_memory_per_proc_per_segment;
  39.113 -
  39.114 -    /* number of data types copies that the scratch buffer can hold */
  39.115 -    n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_size;
  39.116 -    if ( 0 == n_dts_per_buffer ) {
  39.117 -        rc=OMPI_ERROR;
  39.118 -        goto Error;
  39.119 -    }
  39.120 -
  39.121 -    /* need a read and a write buffer for a pair-wise exchange of data */
  39.122 -    n_dts_per_buffer/=2;
  39.123 -    len_data_buffer=n_dts_per_buffer*dt_size;
  39.124 -
  39.125 -    /* compute number of stripes needed to process this collective */
  39.126 -    n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
  39.127 -
  39.128 -    /* */
  39.129 -    n_peers=opal_list_get_size(peers);
  39.130 -
  39.131 -    /* get my rank in the list */
  39.132 -    my_rank=0;
  39.133 -    for (item = opal_list_get_first(peers) ;
  39.134 -            item != opal_list_get_end(peers) ;
  39.135 -            item = opal_list_get_next(peers)) {
  39.136 -        if(ompi_proc_local()==(ompi_proc_t *)item){
  39.137 -            /* this is the pointer to my proc strucuture */
  39.138 -            break;
  39.139 -        }
  39.140 -        my_rank++;
  39.141 -    }
  39.142 -    proc_array=(ompi_proc_t **)malloc(sizeof(ompi_proc_t *)*n_peers);
  39.143 -    if( NULL == proc_array) {
  39.144 -        goto Error;
  39.145 -    }
  39.146 -    cnt=0;
  39.147 -    for (item = opal_list_get_first(peers) ;
  39.148 -            item != opal_list_get_end(peers) ;
  39.149 -            item = opal_list_get_next(peers)) {
  39.150 -        proc_array[cnt]=(ompi_proc_t *)item;
  39.151 -        cnt++;
  39.152 -    }
  39.153 -
  39.154 -    /* get my reduction communication pattern */
  39.155 -    ret=mca_common_netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node);
  39.156 -    if(OMPI_SUCCESS != ret){
  39.157 -        return ret;
  39.158 -    }
  39.159 -
  39.160 -    /* setup flags for non-blocking communications */    
  39.161 -    recv_done=&recv_completion_flag;
  39.162 -    send_done=&send_completion_flag;
  39.163 -
  39.164 -    /* set data type */
  39.165 -    if(&opal_datatype_int4==dtype) {
  39.166 -        data_type=TYPE_INT4;
  39.167 -    }
  39.168 -
  39.169 -    count_processed=0;
  39.170 -
  39.171 -    /* get a pointer to the shared-memory working buffer */
  39.172 -    /* NOTE: starting with a rather synchronous approach */
  39.173 -    for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
  39.174 -
  39.175 -        /* get number of elements to process in this stripe */
  39.176 -        count_this_stripe=n_dts_per_buffer;
  39.177 -        if( count_processed + count_this_stripe > count )
  39.178 -            count_this_stripe=count-count_processed;
  39.179 -
  39.180 -        /* copy data from the input buffer into the temp buffer */
  39.181 -        sbuf_current=(char *)sbuf+count_processed*dt_size;
  39.182 -        memcopy(scratch_bufers[send_buffer],sbuf_current,count_this_stripe*dt_size);
  39.183 -
  39.184 -        /* copy data in from the "extra" source, if need be */
  39.185 -        if(0 < my_exchange_node->n_extra_sources)  {
  39.186 -
  39.187 -            if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
  39.188 -                
  39.189 -                /*
  39.190 -                ** Receive data from extra node
  39.191 -                */
  39.192 -                
  39.193 -                extra_rank=my_exchange_node.rank_extra_source;
  39.194 -                recv_iov.iov_base=scratch_bufers[recv_buffer];
  39.195 -                recv_iov.iov_len=count_this_stripe*dt_size;
  39.196 -                rc = orte_rml.recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
  39.197 -                        OMPI_RML_TAG_ALLREDUCE , 0);
  39.198 -                if(OMPI_SUCCESS != rc ) {
  39.199 -                    goto  Error;
  39.200 -                }
  39.201 -
  39.202 -                /* apply collective operation to first half of the data */
  39.203 -                if( 0 < count_this_stripe ) {
  39.204 -                    op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
  39.205 -                            (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
  39.206 -                }
  39.207 -
  39.208 -
  39.209 -            } else {
  39.210 -        
  39.211 -                /*
  39.212 -                ** Send data to "partner" node
  39.213 -                */
  39.214 -                extra_rank=my_exchange_node.rank_extra_source;
  39.215 -                send_iov.iov_base=scratch_bufers[send_buffer];
  39.216 -                send_iov.iov_len=count_this_stripe*dt_size;
  39.217 -                rc = orte_rml.send(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
  39.218 -                        OMPI_RML_TAG_ALLREDUCE , 0);
  39.219 -                if(OMPI_SUCCESS != rc ) {
  39.220 -                    goto  Error;
  39.221 -                }
  39.222 -            }
  39.223 -
  39.224 -            /* change pointer to scratch buffer - this was we can send data
  39.225 -            ** that we have summed w/o a memory copy, and receive data into the
  39.226 -            ** other buffer, w/o fear of over writting data that has not yet
  39.227 -            ** completed being send
  39.228 -            */
  39.229 -            recv_buffer^=1;
  39.230 -            send_buffer^=1;
  39.231 -        }
  39.232 -
  39.233 -        MB();
  39.234 -        /*
  39.235 -         * Signal parent that data is ready
  39.236 -         */
  39.237 -        tag=base_tag+1;
  39.238 -        my_ctl_pointer->flag=tag;
  39.239 -
  39.240 -        /* loop over data exchanges */
  39.241 -        for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
  39.242 -
  39.243 -            /* debug 
  39.244 -            t4=opal_sys_timer_get_cycles();
  39.245 -             end debug */
  39.246 -
  39.247 -
  39.248 -            my_write_pointer=my_tmp_data_buffer[index_write];
  39.249 -            my_read_pointer=my_tmp_data_buffer[index_read];
  39.250 -
  39.251 -            /* is the remote data read */
  39.252 -            pair_rank=my_exchange_node->rank_exchanges[exchange];
  39.253 -
  39.254 -            *recv_done=0; 
  39.255 -            *send_done=0;
  39.256 -            MB();
  39.257 -
  39.258 -            /* post non-blocking receive */
  39.259 -            recv_iov.iov_base=scratch_bufers[send_buffer];
  39.260 -            recv_iov.iov_len=count_this_stripe*dt_size;
  39.261 -            rc = orte_rml.recv_nb(&(proc_array[extra_rank]->proc_name), recv_iov, 1,
  39.262 -                        OMPI_RML_TAG_ALLREDUCE , 0, recv_completion, recv_done);
  39.263 -
  39.264 -            /* post non-blocking send */
  39.265 -            send_iov.iov_base=scratch_bufers[send_buffer];
  39.266 -            send_iov.iov_len=count_this_stripe*dt_size;
  39.267 -            rc = orte_rml.send_nb(&(proc_array[extra_rank]->proc_name), send_iov, 1,
  39.268 -                        OMPI_RML_TAG_ALLREDUCE , 0, send_completion, send_done);
  39.269 -
  39.270 -            /* wait on receive completion */
  39.271 -            while(!(*recv_done) ) {
  39.272 -                opal_progress();
  39.273 -            }
  39.274 -                
  39.275 -            /* reduce the data */
  39.276 -            if( 0 < count_this_stripe ) {
  39.277 -                op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
  39.278 -                        (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
  39.279 -            }
  39.280 -
  39.281 -            
  39.282 -            /* get ready for next step */
  39.283 -            index_read=(exchange&1);
  39.284 -            index_write=((exchange+1)&1);
  39.285 -
  39.286 -            /* wait on send completion */
  39.287 -            while(!(*send_done) ) {
  39.288 -                opal_progress();
  39.289 -            }
  39.290 -                
  39.291 -        }
  39.292 -
  39.293 -        /* copy data in from the "extra" source, if need be */
  39.294 -        if(0 < my_exchange_node->n_extra_sources)  {
  39.295 -
  39.296 -            if ( EXTRA_NODE == my_exchange_node->node_type ) {
  39.297 -                /* 
  39.298 -                ** receive the data 
  39.299 -                ** */
  39.300 -                extra_rank=my_exchange_node->rank_extra_source;
  39.301 -
  39.302 -                recv_iov.iov_base=scratch_bufers[recv_buffer];
  39.303 -                recv_iov.iov_len=count_this_stripe*dt_size;
  39.304 -                rc = orte_rml.recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
  39.305 -                        OMPI_RML_TAG_ALLREDUCE , 0);
  39.306 -                if(OMPI_SUCCESS != rc ) {
  39.307 -                    goto  Error;
  39.308 -                }
  39.309 -
  39.310 -            } else {
  39.311 -                /* send the data to the pair-rank outside of the power of 2 set
  39.312 -                ** of ranks
  39.313 -                */
  39.314 -
  39.315 -                extra_rank=my_exchange_node->rank_extra_source;
  39.316 -                send_iov.iov_base=scratch_bufers[recv_buffer];
  39.317 -                send_iov.iov_len=count_this_stripe*dt_size;
  39.318 -                rc = orte_rml.recv(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
  39.319 -                        OMPI_RML_TAG_ALLREDUCE , 0);
  39.320 -                if(OMPI_SUCCESS != rc ) {
  39.321 -                    goto  Error;
  39.322 -                }
  39.323 -            }
  39.324 -        }
  39.325 -
  39.326 -        /* copy data into the destination buffer */
  39.327 -        rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
  39.328 -                (char *)((char *)rbuf+dt_extent*count_processed),
  39.329 -                (char *)my_write_pointer);
  39.330 -        if( 0 != rc ) {
  39.331 -            return OMPI_ERROR;
  39.332 -        }
  39.333 -
  39.334 -        /* copy data from the temp buffer into the output buffer */
  39.335 -        rbuf_current=(char *)rbuf+count_processed*dt_size;
  39.336 -        memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size);
  39.337 -    
  39.338 -        /* update the count of elements processed */
  39.339 -        count_processed+=count_this_stripe;
  39.340 -    }
  39.341 -
  39.342 -    /* return */
  39.343 -    return rc;
  39.344 -
  39.345 -Error:
  39.346 -    return rc;
  39.347 -}
    40.1 --- a/ompi/mca/common/netpatterns/common_coll_ops.h	Tue Feb 19 22:36:41 2013 +0000
    40.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.3 @@ -1,29 +0,0 @@
    40.4 -/*
    40.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    40.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    40.7 - * $COPYRIGHT$
    40.8 - *
    40.9 - * Additional copyrights may follow
   40.10 - *
   40.11 - * $HEADER$
   40.12 - */
   40.13 -
   40.14 -#ifndef COMM_OP_TYPES_H
   40.15 -#define COMM_OP_TYPES_H
   40.16 -
   40.17 -#include "ompi_config.h"
   40.18 -
   40.19 -BEGIN_C_DECLS
   40.20 -
   40.21 -int comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
   40.22 -                int op, opal_list_t *peers);
   40.23 -
   40.24 -/* reduction operations supported */
   40.25 -#define OP_SUM 1
   40.26 -
   40.27 -#define TYPE_INT4 1
   40.28 -
   40.29 -
   40.30 -END_C_DECLS
   40.31 -
   40.32 -#endif /* COMM_OP_TYPES_H */
    41.1 --- a/ompi/mca/common/netpatterns/common_netpatterns.h	Tue Feb 19 22:36:41 2013 +0000
    41.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.3 @@ -1,147 +0,0 @@
    41.4 -/*
    41.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    41.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    41.7 - * $COPYRIGHT$
    41.8 - *
    41.9 - * Additional copyrights may follow
   41.10 - *
   41.11 - * $HEADER$
   41.12 - */
   41.13 -
   41.14 -#ifndef COMM_PATTERNS_H
   41.15 -#define COMM_PATTERNS_H
   41.16 -
   41.17 -#include "ompi_config.h"
   41.18 -#include "orte/runtime/orte_globals.h"
   41.19 -#include "common_netpatterns_knomial_tree.h"
   41.20 -
   41.21 -BEGIN_C_DECLS
   41.22 -
   41.23 -int ompi_common_netpatterns_base_err(const char* fmt, ...);
   41.24 -int ompi_common_netpatterns_register_mca_params(void);
   41.25 -
   41.26 -#if OPAL_ENABLE_DEBUG
   41.27 -extern int ompi_common_netpatterns_base_verbose; /* disabled by default */
   41.28 -OMPI_DECLSPEC extern int ompi_common_netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
   41.29 -#define NETPATTERNS_VERBOSE(args)                                \
   41.30 -    do {                                                         \
   41.31 -        if(ompi_common_netpatterns_base_verbose > 0) {           \
   41.32 -            ompi_common_netpatterns_base_err("[%s]%s[%s:%d:%s] ",\
   41.33 -                    orte_process_info.nodename,                  \
   41.34 -                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),          \
   41.35 -                    __FILE__, __LINE__, __func__);               \
   41.36 -            ompi_common_netpatterns_base_err args;               \
   41.37 -            ompi_common_netpatterns_base_err("\n");              \
   41.38 -        }                                                        \
   41.39 -    } while(0); 
   41.40 -#else
   41.41 -#define NETPATTERNS_VERBOSE(args)
   41.42 -#endif
   41.43 -
   41.44 -#define FIND_BASE(base,myid,level,k)    \
   41.45 -    do {                                \
   41.46 -        int temp = 1;                   \
   41.47 -        int jj;                         \
   41.48 -        int knt2;                       \
   41.49 -                                        \
   41.50 -        base = 0;                       \
   41.51 -        for( jj = 0; jj < level; jj++) {\
   41.52 -            temp *= k;                  \
   41.53 -        }                               \
   41.54 -        knt2 = 1;                       \
   41.55 -        while(myid >= knt2*temp){       \
   41.56 -            knt2++;                     \
   41.57 -        }                               \
   41.58 -        base = knt2*temp - temp;        \
   41.59 -    } while(0)                          \
   41.60 -
   41.61 -
   41.62 -
   41.63 -
   41.64 -/* enum for node type */
   41.65 -enum {
   41.66 -    ROOT_NODE,
   41.67 -    LEAF_NODE,
   41.68 -    INTERIOR_NODE
   41.69 -};
   41.70 -
   41.71 -
   41.72 -/*
   41.73 - * N-order tree node description
   41.74 - */
   41.75 -struct mca_common_netpatterns_tree_node_t {
   41.76 -    /* my rank within the group */
   41.77 -    int my_rank;
   41.78 -    /* my node type - root, leaf, or interior */
   41.79 -    int my_node_type;
   41.80 -    /* number of nodes in the tree */
   41.81 -    int tree_size;
   41.82 -    /* number of parents (0/1) */
   41.83 -    int n_parents;
   41.84 -    /* number of children */
   41.85 -    int n_children;
   41.86 -    /* parent rank within the group */
   41.87 -    int parent_rank;
   41.88 -    /* chidren ranks within the group */
   41.89 -    int *children_ranks;
   41.90 -};
   41.91 -typedef struct mca_common_netpatterns_tree_node_t mca_common_netpatterns_tree_node_t;
   41.92 -
   41.93 -struct mca_common_netpatterns_k_exchange_node_t;
   41.94 -/*
   41.95 - * N-order + knominal tree node description
   41.96 - */
   41.97 -struct mca_common_netpatterns_narray_knomial_tree_node_t {
   41.98 -    /* my rank within the group */
   41.99 -    int my_rank;
  41.100 -    /* my node type - root, leaf, or interior */
  41.101 -    int my_node_type;
  41.102 -    /* number of nodes in the tree */
  41.103 -    int tree_size;
  41.104 -    /* number of parents (0/1) */
  41.105 -    int n_parents;
  41.106 -    /* number of children */
  41.107 -    int n_children;
  41.108 -    /* parent rank within the group */
  41.109 -    int parent_rank;
  41.110 -    /* chidren ranks within the group */
  41.111 -    int *children_ranks;
  41.112 -    /* Total number of ranks on this specific level */
  41.113 -    int level_size;
  41.114 -    /* Rank on this node inside of level */
  41.115 -    int rank_on_level;
  41.116 -    /* Knomial recursive gather information */
  41.117 -    struct mca_common_netpatterns_k_exchange_node_t k_node;
  41.118 -};
  41.119 -typedef struct mca_common_netpatterns_narray_knomial_tree_node_t 
  41.120 -mca_common_netpatterns_narray_knomial_tree_node_t;
  41.121 -
  41.122 -
  41.123 -/* Init code for common_netpatterns */
  41.124 -OMPI_DECLSPEC int ompi_common_netpatterns_init(void);
  41.125 -
  41.126 -/* setup an n-array tree */
  41.127 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
  41.128 -        mca_common_netpatterns_tree_node_t *my_node);
  41.129 -/* setup an n-array tree with k-nomial levels */
  41.130 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes,
  41.131 -        mca_common_netpatterns_narray_knomial_tree_node_t *my_node);
  41.132 -
  41.133 -/* setup an multi-nomial tree - for each node in the tree
  41.134 - *  this returns it's parent, and it's children 
  41.135 - */
  41.136 -OMPI_DECLSPEC int mca_common_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
  41.137 -        mca_common_netpatterns_tree_node_t *tree_nodes);
  41.138 -
  41.139 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree_contigous_ranks(int tree_order,
  41.140 -        int num_nodes, mca_common_netpatterns_tree_node_t **tree_nodes);
  41.141 -
  41.142 -/* calculate the nearest power of radix that is equal to or greater
  41.143 - * than size, with the specified radix.  The resulting tree is of
  41.144 - * depth n_lvls.
  41.145 - */
  41.146 -OMPI_DECLSPEC int roundup_to_power_radix( int radix, int size, int *n_lvls );
  41.147 -
  41.148 -END_C_DECLS
  41.149 -
  41.150 -#endif /* COMM_PATTERNS_H */
    42.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_base.c	Tue Feb 19 22:36:41 2013 +0000
    42.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.3 @@ -1,53 +0,0 @@
    42.4 -/*
    42.5 - *
    42.6 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    42.7 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    42.8 - * $COPYRIGHT$
    42.9 - * 
   42.10 - * Additional copyrights may follow
   42.11 - * 
   42.12 - * $HEADER$
   42.13 - */
   42.14 -#include "opal/mca/base/mca_base_param.h"
   42.15 -#include "ompi/include/ompi/constants.h"
   42.16 -#include "common_netpatterns.h"
   42.17 -
   42.18 -int ompi_common_netpatterns_base_verbose = 0; /* disabled by default */
   42.19 -
   42.20 -int ompi_common_netpatterns_register_mca_params(void)
   42.21 -{
   42.22 -    mca_base_param_reg_int_name("common", 
   42.23 -                                "netpatterns_base_verbose", 
   42.24 -                                "Verbosity level of the NETPATTERNS framework", 
   42.25 -                                false, false, 
   42.26 -                                0, 
   42.27 -                                &ompi_common_netpatterns_base_verbose);
   42.28 -
   42.29 -    return OMPI_SUCCESS;
   42.30 -}
   42.31 -
   42.32 -int ompi_common_netpatterns_base_err(const char* fmt, ...)
   42.33 -{
   42.34 -    va_list list;
   42.35 -    int ret;
   42.36 -
   42.37 -    va_start(list, fmt);
   42.38 -    ret = vfprintf(stderr, fmt, list);
   42.39 -    va_end(list);
   42.40 -    return ret;
   42.41 -}
   42.42 -
   42.43 -int ompi_common_netpatterns_init(void)
   42.44 -{
   42.45 -/* There is no component for common_netpatterns so every component that uses it
   42.46 -   should call ompi_common_netpatterns_init, still we want to run it only once */
   42.47 -static int was_called = 0;
   42.48 -
   42.49 -    if (0 == was_called) {
   42.50 -        was_called = 1;
   42.51 -    
   42.52 -        return ompi_common_netpatterns_register_mca_params();
   42.53 -    }
   42.54 -
   42.55 -    return OMPI_SUCCESS;
   42.56 -}
    43.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.c	Tue Feb 19 22:36:41 2013 +0000
    43.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.3 @@ -1,932 +0,0 @@
    43.4 -/*
    43.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    43.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    43.7 - * $COPYRIGHT$
    43.8 - *
    43.9 - * Additional copyrights may follow
   43.10 - *
   43.11 - * $HEADER$
   43.12 - */
   43.13 -
   43.14 -#include "ompi_config.h"
   43.15 -#ifdef HAVE_UNISTD_H
   43.16 -#include <unistd.h>
   43.17 -#endif
   43.18 -#include <sys/types.h>
   43.19 -#ifdef HAVE_SYS_MMAN_H
   43.20 -#include <sys/mman.h>
   43.21 -#endif
   43.22 -#include <fcntl.h>
   43.23 -#include <stdlib.h>
   43.24 -#include <assert.h>
   43.25 -
   43.26 -#include "ompi/constants.h"
   43.27 -#include "common_netpatterns.h"
   43.28 -
   43.29 -/* setup recursive doubleing tree node */
   43.30 -
   43.31 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
   43.32 -        int num_nodes, int node_rank, int tree_order, int *hier_ranks,
   43.33 -        mca_common_netpatterns_k_exchange_node_t *exchange_node)
   43.34 -{
   43.35 -    /* local variables */
   43.36 -    int i, j, cnt, i_temp;
   43.37 -    int knt,knt2,kk, ex_node, stray;
   43.38 -    int n_levels,pow_k;
   43.39 -    int k_temp1;
   43.40 -    int k_temp2;
   43.41 -    int myid, reindex_myid = 0;
   43.42 -    int base, peer_base,base_temp;
   43.43 -    int peer; 
   43.44 -    int *prev_data = NULL;
   43.45 -    int *current_data = NULL;
   43.46 -    int *group_info = NULL;
   43.47 -
   43.48 -
   43.49 -    NETPATTERNS_VERBOSE(
   43.50 -            ("Enter mca_common_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
   43.51 -                num_nodes, node_rank, tree_order));
   43.52 -
   43.53 -    assert(num_nodes > 1);
   43.54 -    assert(tree_order > 1);
   43.55 -    if (tree_order > num_nodes) {
   43.56 -        tree_order = num_nodes;
   43.57 -    }
   43.58 -
   43.59 -    /* k-nomial radix */
   43.60 -    exchange_node->tree_order = tree_order;
   43.61 -
   43.62 -    /* Calculate the number of levels in the tree for 
   43.63 -     * the largest power of tree_order less than or 
   43.64 -     * equal to the group size
   43.65 -     */
   43.66 -    n_levels = 0;
   43.67 -    cnt=1;
   43.68 -    while ( num_nodes > cnt ) {
   43.69 -        cnt *= tree_order;
   43.70 -        n_levels++;
   43.71 -    }
   43.72 -    /* this is the actual number of recusive k-ing steps 
   43.73 -     * we will perform, the last step may not be a full 
   43.74 -     * step depending on the outcome of the next conditional
   43.75 -     */
   43.76 -    pow_k = n_levels;
   43.77 -
   43.78 -    /* figure out the largest power of tree_order that is less than or equal to
   43.79 -     * num_nodes */
   43.80 -    if ( cnt > num_nodes) {
   43.81 -        cnt /= tree_order;
   43.82 -        n_levels--;
   43.83 -    }
   43.84 -
   43.85 -    /*exchange_node->log_tree_order = n_levels;*/
   43.86 -    exchange_node->log_tree_order = pow_k;
   43.87 -    exchange_node->n_largest_pow_tree_order = cnt;
   43.88 -
   43.89 -    
   43.90 -    /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
   43.91 -    /* I don't think we need to cache this info this group_info array */
   43.92 -    group_info = (int *) calloc(pow_k , sizeof(int));
   43.93 -    group_info[0] = num_nodes/tree_order;
   43.94 -    /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
   43.95 -    for ( i = 1; i < pow_k; i ++) {
   43.96 -        group_info[i] = group_info[i-1]/tree_order;
   43.97 -        /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
   43.98 -
   43.99 -    }
  43.100 -
  43.101 -    /* find number of incomplete groups and number of ranks belonging to those ranks */
  43.102 -    knt=0;
  43.103 -    while (knt <= (pow_k - 1) && group_info[knt] > 0) {
  43.104 -        knt++;
  43.105 -    }
  43.106 -    knt--;
  43.107 -    /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
  43.108 -    
  43.109 -    /* k_temp is a synonym for cnt which is the largest full power of k group */
  43.110 -    /* now, start the calculation to find the first stray rank aka "extra" rank */ 
  43.111 -    stray = 0;
  43.112 -    /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt, 
  43.113 -                                                                           cnt*group_info[knt],
  43.114 -                                                                           num_nodes - cnt*group_info[knt]);*/
  43.115 -
  43.116 -
  43.117 -    /* cache this info, it's muy importante */
  43.118 -    stray = cnt*group_info[knt];
  43.119 -    exchange_node->k_nomial_stray = stray;
  43.120 -
  43.121 -
  43.122 -
  43.123 -    /* before we do this, we need to first reindex */
  43.124 -    /* reindexing phase */
  43.125 -     /* this is the reindex phase */
  43.126 -    exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
  43.127 -    /* this is the inverse map */
  43.128 -    exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
  43.129 -    /*int reindex_myid;*/
  43.130 -    /* reindex */
  43.131 -    if( stray < num_nodes ) {
  43.132 -        /* find the first proxy rank */
  43.133 -        peer = stray - cnt;
  43.134 -        /* fix all ranks prior to this rank */
  43.135 -        for( i = 0; i < peer; i++){
  43.136 -            exchange_node->reindex_map[i] = i;
  43.137 -        }
  43.138 -        /* now, start the swap */
  43.139 -        exchange_node->reindex_map[peer] = peer;
  43.140 -        for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
  43.141 -            exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
  43.142 -        }
  43.143 -        i_temp = i;
  43.144 -        for( i = i_temp; i < stray; i++) {
  43.145 -            exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
  43.146 -        }
  43.147 -        /* now, finish it off */
  43.148 -        exchange_node->reindex_map[stray] = peer + 1;
  43.149 -        for( i = (stray+1); i < num_nodes; i++) {
  43.150 -            exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
  43.151 -        }
  43.152 -        /* debug print */
  43.153 -        /*
  43.154 -        for( i = 0; i < np; i++){
  43.155 -            fprintf(stderr,"%d ",reindex_map[i]);
  43.156 -        }
  43.157 -        fprintf(stderr,"\n");
  43.158 -        */
  43.159 -    } else {
  43.160 -        /* we have no extras, trivial reindexing */
  43.161 -        for( i = 0; i < num_nodes; i++){
  43.162 -            exchange_node->reindex_map[i] = i;
  43.163 -        }
  43.164 -    }
  43.165 -    /* finished reindexing */
  43.166 -
  43.167 -    /* Now, I need to get my rank in the new indexing */
  43.168 -    for( i = 0; i < num_nodes; i++ ){
  43.169 -        if( node_rank == exchange_node->reindex_map[i] ){
  43.170 -            exchange_node->reindex_myid = i;
  43.171 -            break;
  43.172 -        }
  43.173 -    }
  43.174 -    /* Now, let's compute the inverse mapping here */
  43.175 -    for( i = 0; i < num_nodes; i++){
  43.176 -        j = 0;
  43.177 -        while(exchange_node->reindex_map[j] != i ){
  43.178 -            j++;
  43.179 -        }
  43.180 -        exchange_node->inv_reindex_map[i] = j;
  43.181 -    }
  43.182 -
  43.183 -
  43.184 -    /* Now we get the data sizes we should expect at each level */
  43.185 -    /* now get the size of the data I am to receive from each peer */
  43.186 -    /*int **payload_info;*/
  43.187 -    prev_data = (int *) malloc( num_nodes*sizeof(int) );
  43.188 -    if( NULL == prev_data ) {
  43.189 -        goto Error;
  43.190 -    }
  43.191 -
  43.192 -    current_data = (int *) malloc( num_nodes*sizeof(int) );
  43.193 -    if( NULL == current_data ) {
  43.194 -        goto Error;
  43.195 -    }
  43.196 -
  43.197 -
  43.198 -    exchange_node->payload_info = (mca_common_netpatterns_payload_t **) malloc(sizeof(mca_common_netpatterns_payload_t *)*pow_k);
  43.199 -    if( NULL == exchange_node->payload_info) {
  43.200 -        goto Error;
  43.201 -    }
  43.202 -
  43.203 -    for(i = 0; i < pow_k; i++){
  43.204 -        exchange_node->payload_info[i] = (mca_common_netpatterns_payload_t *) malloc(sizeof(mca_common_netpatterns_payload_t)*(tree_order-1));
  43.205 -        if( NULL == exchange_node->payload_info[i]) {
  43.206 -            goto Error;
  43.207 -        }
  43.208 -
  43.209 -    }
  43.210 -    /* intialize the payload array 
  43.211 -       This is the money struct, just need to initialize this with 
  43.212 -       the subgroup information */ 
  43.213 -    /*
  43.214 -    for(i = 0; i < num_nodes; i++){
  43.215 -        prev_data[i] = 1;
  43.216 -        current_data[i] = 1;
  43.217 -    }
  43.218 -    */
  43.219 -
  43.220 -    for(i = 0; i < num_nodes; i++){
  43.221 -        prev_data[i] = hier_ranks[i];
  43.222 -        current_data[i] = hier_ranks[i];
  43.223 -    }
  43.224 -
  43.225 -    /* everyone will need to do this loop over all ranks 
  43.226 -     * Phase I calculate the contribution from the extra ranks 
  43.227 -     */
  43.228 -    for( myid = 0; myid < num_nodes; myid++) {
  43.229 -        /* get my new rank */
  43.230 -        for( j = 0; j < num_nodes; j++ ){
  43.231 -            /* this will be satisfied for one of the indices */
  43.232 -            if( myid == exchange_node->reindex_map[j] ){
  43.233 -                reindex_myid = j;
  43.234 -                break;
  43.235 -            }
  43.236 -        }
  43.237 -
  43.238 -        for( j = stray; j < num_nodes; j++) {
  43.239 -            if(reindex_myid == ( j - cnt )) {
  43.240 -                /* then this is a proxy rank */
  43.241 -                prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
  43.242 -                break;
  43.243 -            }
  43.244 -
  43.245 -        }
  43.246 -    }
  43.247 -
  43.248 -    /* Phase II calculate the contribution from each recursive k - ing level
  43.249 -     *
  43.250 -     */
  43.251 -    k_temp1 = tree_order; /* k^1 */
  43.252 -    k_temp2 = 1;   /* k^0 */
  43.253 -    peer_base = 0;
  43.254 -    base_temp = 0;
  43.255 -    for( i = 0; i < pow_k; i++) {
  43.256 -        /* get my new rank */
  43.257 -        for( myid = 0; myid < num_nodes; myid++){
  43.258 -            current_data[myid] = prev_data[myid];
  43.259 -            /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
  43.260 -            for( j = 0; j < num_nodes; j++ ){
  43.261 -                if( myid == exchange_node->reindex_map[j] ){
  43.262 -                    reindex_myid = j;
  43.263 -                    break;
  43.264 -                }
  43.265 -            }
  43.266 -            if( reindex_myid < stray ) { 
  43.267 -                /* now start the actual algorithm */
  43.268 -                FIND_BASE(base,reindex_myid,i+1,tree_order);
  43.269 -                for( j = 0; j < ( tree_order - 1 ); j ++ ) {
  43.270 -                    peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
  43.271 -                    if( peer < stray ) {
  43.272 -                        /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
  43.273 -                        /* then get the data */
  43.274 -                        if( node_rank == myid ){
  43.275 -                            exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
  43.276 -                            /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
  43.277 -                            if( i > 0 ) {
  43.278 -                                
  43.279 -                                /* find my len and offset */
  43.280 -                                FIND_BASE(peer_base,peer,i,tree_order);
  43.281 -                                /* I do not want to mess with this, but it seems that I have no choice */
  43.282 -                               ex_node = exchange_node->reindex_map[peer_base];
  43.283 -                               /* now, find out how far down the line this guy really is */
  43.284 -                               knt2 =0;
  43.285 -                               for(kk = 0; kk < ex_node; kk++){ 
  43.286 -                                   knt2 += hier_ranks[kk];
  43.287 -                               }
  43.288 -                                exchange_node->payload_info[i][j].r_offset = knt2; 
  43.289 -                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
  43.290 -                                
  43.291 -                                FIND_BASE(base_temp,reindex_myid,i,tree_order);
  43.292 -                                ex_node = exchange_node->reindex_map[base_temp];
  43.293 -                                knt2 = 0;
  43.294 -                                for( kk = 0; kk < ex_node; kk++){
  43.295 -                                    knt2 += hier_ranks[kk];
  43.296 -                                }
  43.297 -                                exchange_node->payload_info[i][j].s_offset =
  43.298 -                                                                  knt2; /* exchange_node->reindex_map[base_temp]; */
  43.299 -                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
  43.300 -                            } else {
  43.301 -                                ex_node = exchange_node->reindex_map[peer];
  43.302 -                                knt2 =0;
  43.303 -                                for(kk = 0; kk < ex_node; kk++){
  43.304 -                                    knt2 += hier_ranks[kk];
  43.305 -                                }
  43.306 -                                exchange_node->payload_info[i][j].r_offset =
  43.307 -                                    knt2; /*exchange_node->reindex_map[peer]; */
  43.308 -                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
  43.309 -                                knt2 = 0;
  43.310 -                                for(kk = 0; kk < myid; kk++){
  43.311 -                                    knt2 += hier_ranks[kk];
  43.312 -                                }
  43.313 -                                exchange_node->payload_info[i][j].s_offset = knt2; 
  43.314 -                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
  43.315 -                            }
  43.316 -                            /* how much I am to receive from this peer on this level */
  43.317 -                            /* how much I am to send to this peer on this level */
  43.318 -                            exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
  43.319 -                            /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
  43.320 -                            /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
  43.321 -                                                                        prev_data[exchange_node->reindex_map[peer]],
  43.322 -                                                                        exchange_node->reindex_map[peer], i+1);*/
  43.323 -                            /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
  43.324 -                                      exchange_node->reindex_map[peer],i+1);*/
  43.325 -                        }
  43.326 -
  43.327 -                        current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
  43.328 -                    }
  43.329 -                }
  43.330 -            }
  43.331 -
  43.332 -
  43.333 -        }
  43.334 -        k_temp1 *= tree_order;
  43.335 -        k_temp2 *= tree_order;
  43.336 -        /* debug print */
  43.337 -       /* fprintf(stderr,"Level %d current data ",i+1);*/
  43.338 -        for( j = 0; j < num_nodes; j++){
  43.339 -           /* fprintf(stderr,"%d ",current_data[j]); */
  43.340 -            prev_data[j] = current_data[j];
  43.341 -        }
  43.342 -       /* fprintf(stderr,"\n");*/
  43.343 -        
  43.344 -    }
  43.345 -
  43.346 -
  43.347 -    /* this is the natural way to do recursive k-ing */
  43.348 -    /* should never have more than one extra rank per proxy */
  43.349 -    if( exchange_node->reindex_myid >= stray ){
  43.350 -        /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
  43.351 -        exchange_node->node_type = EXTRA_NODE;
  43.352 -    } else {
  43.353 -        exchange_node->node_type = EXCHANGE_NODE;
  43.354 -    }
  43.355 -
  43.356 -    /* set node characteristics - node that is not within the largest
  43.357 -     * power of tree_order will just send its data to node that will participate
  43.358 -     * in the recursive k-ing, and get the result back at the end.
  43.359 -     * set the initial and final data exchanges - those that are not
  43.360 -     * part of the recursive k-ing.
  43.361 -     */
  43.362 -    if (EXCHANGE_NODE == exchange_node->node_type)  {
  43.363 -        exchange_node->n_extra_sources = 0;
  43.364 -        for( i = stray; i < num_nodes; i++) {
  43.365 -            if(exchange_node->reindex_myid == ( i - cnt )) {
  43.366 -                /* then I am a proxy rank and there is only a 
  43.367 -                 * single extra source
  43.368 -                 */
  43.369 -                exchange_node->n_extra_sources = 1;
  43.370 -                break;
  43.371 -            }
  43.372 -        }
  43.373 -
  43.374 -        if (exchange_node->n_extra_sources > 0) {
  43.375 -            exchange_node->rank_extra_sources_array = (int *) malloc
  43.376 -                (exchange_node->n_extra_sources * sizeof(int));
  43.377 -            if( NULL == exchange_node->rank_extra_sources_array ) {
  43.378 -                goto Error;
  43.379 -            }
  43.380 -            /* you broke above */
  43.381 -            exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
  43.382 -        } else {
  43.383 -            exchange_node->rank_extra_sources_array = NULL;
  43.384 -        }
  43.385 -    } else {
  43.386 -        /* I am an extra rank, find my proxy rank */
  43.387 -        exchange_node->n_extra_sources = 1;
  43.388 -
  43.389 -        exchange_node->rank_extra_sources_array = (int *) malloc
  43.390 -            (exchange_node->n_extra_sources * sizeof(int));
  43.391 -        if( NULL == exchange_node->rank_extra_sources_array ) {
  43.392 -            goto Error;
  43.393 -        }
  43.394 -        exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
  43.395 -    }
  43.396 -
  43.397 -
  43.398 -    /* set the exchange pattern */
  43.399 -    if (EXCHANGE_NODE == exchange_node->node_type) {
  43.400 -        /* yep, that's right PLUS 1 */
  43.401 -        exchange_node->n_exchanges = n_levels + 1;
  43.402 -        /* initialize this */
  43.403 -        exchange_node->n_actual_exchanges = 0;
  43.404 -        /* Allocate 2 dimension array thak keeps
  43.405 -         rank exchange information for each step*/ 
  43.406 -        exchange_node->rank_exchanges = (int **) malloc
  43.407 -            (exchange_node->n_exchanges * sizeof(int *));
  43.408 -        if(NULL == exchange_node->rank_exchanges) {
  43.409 -            goto Error;
  43.410 -        }
  43.411 -        for (i = 0; i < exchange_node->n_exchanges; i++) {
  43.412 -            exchange_node->rank_exchanges[i] = (int *) malloc
  43.413 -                ((tree_order - 1) * sizeof(int));
  43.414 -            if( NULL == exchange_node->rank_exchanges ) {
  43.415 -                goto Error;
  43.416 -            }
  43.417 -        }
  43.418 -        k_temp1 = tree_order;
  43.419 -        k_temp2 = 1;
  43.420 -        /* fill in exchange partners */
  43.421 -        /* Ok, now we start with the actual algorithm */
  43.422 -        for( i = 0; i < exchange_node->n_exchanges; i ++) {
  43.423 -            /*fprintf(stderr,"Starting Level %d\n",i+1);*/
  43.424 -
  43.425 -            FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
  43.426 -            /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
  43.427 -            for( j = 0; j < (tree_order-1); j ++ ) {
  43.428 -                peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
  43.429 -                if ( peer < stray ) {
  43.430 -                    exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
  43.431 -                    /* an actual exchange occurs, bump the counter */
  43.432 -                   
  43.433 -                } else {
  43.434 -                    /* out of range, skip it - do not bump the n_actual_exchanges counter */
  43.435 -                    exchange_node->rank_exchanges[i][j] = -1;
  43.436 -                }
  43.437 -            
  43.438 -            }
  43.439 -            k_temp1 *= tree_order;
  43.440 -            k_temp2 *= tree_order;
  43.441 -        }
  43.442 -        for(i = 0; i < pow_k; i++){
  43.443 -            for(j = 0; j < (tree_order-1); j++){
  43.444 -                if(-1 != exchange_node->rank_exchanges[i][j]){
  43.445 -                    /* then bump the counter */
  43.446 -                    exchange_node->n_actual_exchanges++;
  43.447 -                }
  43.448 -            }
  43.449 -        }
  43.450 -
  43.451 -    } else {
  43.452 -        /* we are extra ranks and we don't participate in the exchange :( */
  43.453 -        exchange_node->n_exchanges=0;
  43.454 -        exchange_node->rank_exchanges=NULL;
  43.455 -    }
  43.456 -
  43.457 -
  43.458 -    /* set the number of tags needed per stripe - this must be the
  43.459 -     *   same across all procs in the communicator.
  43.460 -     */
  43.461 -    /* do we need this one */
  43.462 -    exchange_node->n_tags = tree_order * n_levels + 1;
  43.463 -    
  43.464 -    free(prev_data);
  43.465 -    free(current_data);
  43.466 -    free(group_info);
  43.467 -
  43.468 -    /* successful return */
  43.469 -    return OMPI_SUCCESS;
  43.470 -
  43.471 -Error:
  43.472 -
  43.473 -    if (NULL != exchange_node->rank_extra_sources_array) {
  43.474 -        free(exchange_node->rank_extra_sources_array);
  43.475 -    }
  43.476 -
  43.477 -    if (NULL != exchange_node->rank_exchanges) {
  43.478 -        for (i = 0; i < exchange_node->n_exchanges; i++) {
  43.479 -            if (NULL != exchange_node->rank_exchanges[i]) {
  43.480 -                free(exchange_node->rank_exchanges[i]);
  43.481 -            }
  43.482 -        }
  43.483 -        free(exchange_node->rank_exchanges);
  43.484 -    }
  43.485 -
  43.486 -    if (NULL != prev_data ){
  43.487 -        free(prev_data);
  43.488 -    }
  43.489 -
  43.490 -    if(NULL != current_data) {
  43.491 -        free(current_data);
  43.492 -    }
  43.493 -
  43.494 -    if(NULL != group_info) {
  43.495 -        free(group_info);
  43.496 -    }
  43.497 -
  43.498 -    /* error return */
  43.499 -    return OMPI_ERROR;
  43.500 -}
  43.501 -
  43.502 -
  43.503 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_tree_node(
  43.504 -        int num_nodes, int node_rank, int tree_order,
  43.505 -        mca_common_netpatterns_k_exchange_node_t *exchange_node)
  43.506 -{
  43.507 -    /* local variables */
  43.508 -    int i, j, tmp, cnt;
  43.509 -    int n_levels;
  43.510 -    int k_base, kpow_num, peer; 
  43.511 -
  43.512 -    NETPATTERNS_VERBOSE(
  43.513 -            ("Enter mca_common_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
  43.514 -                num_nodes, node_rank, tree_order));
  43.515 -
  43.516 -    assert(num_nodes > 1);
  43.517 -    assert(tree_order > 1);
  43.518 -    if (tree_order > num_nodes) {
  43.519 -        tree_order = num_nodes;
  43.520 -    }
  43.521 -
  43.522 -    exchange_node->tree_order = tree_order;
  43.523 -
  43.524 -    /* figure out number of levels in the tree */
  43.525 -    n_levels = 0;
  43.526 -    /* cnt - number of ranks in given level */
  43.527 -    cnt=1;
  43.528 -    while ( num_nodes > cnt ) {
  43.529 -        cnt *= tree_order;
  43.530 -        n_levels++;
  43.531 -    };
  43.532 -
  43.533 -    /* figure out the largest power of tree_order that is less than or equal to
  43.534 -     * num_nodes */
  43.535 -    if ( cnt > num_nodes) {
  43.536 -        cnt /= tree_order;
  43.537 -        n_levels--;
  43.538 -    }
  43.539 -
  43.540 -    exchange_node->log_tree_order = n_levels;
  43.541 -    exchange_node->n_largest_pow_tree_order = cnt;
  43.542 -
  43.543 -    /* set node characteristics - node that is not within the largest
  43.544 -     *  power of tree_order will just send it's data to node that will participate
  43.545 -     *  in the recursive doubling, and get the result back at the end.
  43.546 -     */
  43.547 -    if (node_rank + 1 > cnt) {
  43.548 -        exchange_node->node_type = EXTRA_NODE;
  43.549 -    } else {
  43.550 -        exchange_node->node_type = EXCHANGE_NODE;
  43.551 -    }
  43.552 -
  43.553 -
  43.554 -    /* set the initial and final data exchanges - those that are not
  43.555 -     *   part of the recursive doubling.
  43.556 -     */
  43.557 -    if (EXCHANGE_NODE == exchange_node->node_type)  {
  43.558 -        exchange_node->n_extra_sources = 0;
  43.559 -        for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
  43.560 -                tmp < num_nodes && i < tree_order - 1;
  43.561 -                ++i, ++tmp) {
  43.562 -            ++exchange_node->n_extra_sources;
  43.563 -        }
  43.564 -
  43.565 -        assert(exchange_node->n_extra_sources < tree_order);
  43.566 -
  43.567 -        if (exchange_node->n_extra_sources > 0) {
  43.568 -            exchange_node->rank_extra_sources_array = (int *) malloc
  43.569 -                (exchange_node->n_extra_sources * sizeof(int));
  43.570 -            if( NULL == exchange_node->rank_extra_sources_array ) {
  43.571 -                goto Error;
  43.572 -            }
  43.573 -            for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
  43.574 -                    i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
  43.575 -                NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
  43.576 -                exchange_node->rank_extra_sources_array[i] = tmp;
  43.577 -            }
  43.578 -        } else {
  43.579 -            exchange_node->rank_extra_sources_array = NULL;
  43.580 -        }
  43.581 -    } else {
  43.582 -        exchange_node->n_extra_sources = 1;
  43.583 -        exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
  43.584 -        if( NULL == exchange_node->rank_extra_sources_array ) {
  43.585 -            goto Error;
  43.586 -        }
  43.587 -        exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
  43.588 -        NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
  43.589 -                    exchange_node->rank_extra_sources_array[0] ));
  43.590 -    }
  43.591 -
  43.592 -    /* set the exchange pattern */
  43.593 -    if (EXCHANGE_NODE == exchange_node->node_type) {
  43.594 -        exchange_node->n_exchanges = n_levels;
  43.595 -        /* Allocate 2 dimension array thak keeps
  43.596 -         rank exchange information for each step*/ 
  43.597 -        exchange_node->rank_exchanges = (int **) malloc
  43.598 -            (exchange_node->n_exchanges * sizeof(int *));
  43.599 -        if(NULL == exchange_node->rank_exchanges) {
  43.600 -            goto Error;
  43.601 -        }
  43.602 -        for (i = 0; i < exchange_node->n_exchanges; i++) {
  43.603 -            exchange_node->rank_exchanges[i] = (int *) malloc
  43.604 -                ((tree_order - 1) * sizeof(int));
  43.605 -            if( NULL == exchange_node->rank_exchanges ) {
  43.606 -                goto Error;
  43.607 -            }
  43.608 -        }
  43.609 -        /* fill in exchange partners */
  43.610 -        for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges; 
  43.611 -                                      i++, kpow_num *= tree_order) {
  43.612 -            k_base = node_rank / (kpow_num * tree_order);
  43.613 -            for(j = 1; j < tree_order; j++) {
  43.614 -                peer = node_rank + kpow_num * j;
  43.615 -                if (k_base != peer/(kpow_num * tree_order)) {
  43.616 -                    /* Wraparound the number */
  43.617 -                    peer = k_base * (kpow_num * tree_order)  + 
  43.618 -                        peer % (kpow_num * tree_order);
  43.619 -                }
  43.620 -                exchange_node->rank_exchanges[i][j - 1] = peer;
  43.621 -                NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d", 
  43.622 -                            i, j, tree_order, peer));
  43.623 -            }
  43.624 -        }
  43.625 -    } else {
  43.626 -        exchange_node->n_exchanges=0;
  43.627 -        exchange_node->rank_exchanges=NULL;
  43.628 -    }
  43.629 -
  43.630 -    /* set the number of tags needed per stripe - this must be the
  43.631 -     *   same across all procs in the communicator.
  43.632 -     */
  43.633 -    /* do we need this one */
  43.634 -    exchange_node->n_tags = tree_order * n_levels + 1;
  43.635 -
  43.636 -    /* successful return */
  43.637 -    return OMPI_SUCCESS;
  43.638 -
  43.639 -Error:
  43.640 -
  43.641 -    if (NULL != exchange_node->rank_extra_sources_array) {
  43.642 -        free(exchange_node->rank_extra_sources_array);
  43.643 -    }
  43.644 -
  43.645 -    if (NULL != exchange_node->rank_exchanges) {
  43.646 -        for (i = 0; i < exchange_node->n_exchanges; i++) {
  43.647 -            if (NULL != exchange_node->rank_exchanges[i]) {
  43.648 -                free(exchange_node->rank_exchanges[i]);
  43.649 -            }
  43.650 -        }
  43.651 -        free(exchange_node->rank_exchanges);
  43.652 -    }
  43.653 -
  43.654 -    /* error return */
  43.655 -    return OMPI_ERROR;
  43.656 -}
  43.657 -
  43.658 -#if 1 
  43.659 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
  43.660 -        mca_common_netpatterns_pair_exchange_node_t *exchange_node)
  43.661 -{
  43.662 -    /* local variables */
  43.663 -    int i, tmp, cnt;
  43.664 -    int n_levels;
  43.665 -    int shift, mask;
  43.666 -
  43.667 -    NETPATTERNS_VERBOSE(("Enter mca_common_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
  43.668 -
  43.669 -    assert(num_nodes > 1);
  43.670 -    while (tree_order > num_nodes) {
  43.671 -        tree_order /= 2;
  43.672 -    }
  43.673 -
  43.674 -    exchange_node->tree_order = tree_order;
  43.675 -    /* We support only tree_order that are power of two */
  43.676 -    assert(0 == (tree_order & (tree_order - 1)));
  43.677 -
  43.678 -    /* figure out number of levels in the tree */
  43.679 -    n_levels = 0;
  43.680 -    /* cnt - number of ranks in given level */
  43.681 -    cnt=1;
  43.682 -    while ( num_nodes > cnt ) {
  43.683 -        cnt *= tree_order;
  43.684 -        n_levels++;
  43.685 -    };
  43.686 -
  43.687 -    /* figure out the largest power of tree_order that is less than or equal to
  43.688 -     * num_nodes */
  43.689 -    if ( cnt > num_nodes) {
  43.690 -        cnt /= tree_order;
  43.691 -        n_levels--;
  43.692 -    }
  43.693 -    exchange_node->log_tree_order = n_levels;
  43.694 -    if (2 == tree_order) {
  43.695 -        exchange_node->log_2 = exchange_node->log_tree_order;
  43.696 -    }
  43.697 -
  43.698 -    tmp=1;
  43.699 -    for (i=0 ; i < n_levels ; i++ ) {
  43.700 -        tmp *= tree_order;
  43.701 -    }
  43.702 -    /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
  43.703 -    assert(tmp == cnt);
  43.704 -
  43.705 -    exchange_node->n_largest_pow_tree_order = tmp;
  43.706 -    if (2 == tree_order) {
  43.707 -        exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
  43.708 -    }
  43.709 -
  43.710 -    /* set node characteristics - node that is not within the largest
  43.711 -     *  power of tree_order will just send it's data to node that will participate
  43.712 -     *  in the recursive doubling, and get the result back at the end.
  43.713 -     */
  43.714 -    if ( node_rank + 1 > cnt ) {
  43.715 -        exchange_node->node_type = EXTRA_NODE;
  43.716 -    } else {
  43.717 -        exchange_node->node_type = EXCHANGE_NODE;
  43.718 -    }
  43.719 -
  43.720 -    /* set the initial and final data exchanges - those that are not
  43.721 -     *   part of the recursive doubling.
  43.722 -     */
  43.723 -    if ( EXCHANGE_NODE == exchange_node->node_type ) {
  43.724 -        exchange_node->n_extra_sources = 0;
  43.725 -        for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
  43.726 -            ++exchange_node->n_extra_sources;
  43.727 -        }
  43.728 -        if (exchange_node->n_extra_sources > 0) {
  43.729 -            exchange_node->rank_extra_sources_array = (int *) malloc
  43.730 -                (exchange_node->n_extra_sources * sizeof(int));
  43.731 -            if( NULL == exchange_node->rank_extra_sources_array ) {
  43.732 -                goto Error;
  43.733 -            }
  43.734 -            for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
  43.735 -                NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
  43.736 -                exchange_node->rank_extra_sources_array[i] = tmp;
  43.737 -            }
  43.738 -        } else {
  43.739 -            exchange_node->rank_extra_sources_array = NULL;
  43.740 -        }
  43.741 -    } else {
  43.742 -        exchange_node->n_extra_sources = 1;
  43.743 -        exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
  43.744 -        if( NULL == exchange_node->rank_extra_sources_array ) {
  43.745 -            goto Error;
  43.746 -        }
  43.747 -        exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
  43.748 -        NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
  43.749 -    }
  43.750 -
  43.751 -    /* Ishai: To be compatable with the old structure - should be remoived later */
  43.752 -    if (1 == exchange_node->n_extra_sources) {
  43.753 -        exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
  43.754 -    } else {
  43.755 -        exchange_node->rank_extra_source = -1;
  43.756 -    }
  43.757 -
  43.758 -    /* set the exchange pattern */
  43.759 -    if ( EXCHANGE_NODE == exchange_node->node_type ) {
  43.760 -        exchange_node->n_exchanges = n_levels * (tree_order - 1);
  43.761 -        exchange_node->rank_exchanges = (int *) malloc
  43.762 -            (exchange_node->n_exchanges * sizeof(int));
  43.763 -        if( NULL == exchange_node->rank_exchanges ) {
  43.764 -            goto Error;
  43.765 -        }
  43.766 -
  43.767 -        /* fill in exchange partners */
  43.768 -        for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
  43.769 -            for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
  43.770 -                exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
  43.771 -                NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
  43.772 -            }
  43.773 -        }
  43.774 -
  43.775 -    } else {
  43.776 -
  43.777 -        exchange_node->n_exchanges=0;
  43.778 -        exchange_node->rank_exchanges=NULL;
  43.779 -
  43.780 -    }
  43.781 -
  43.782 -    /* set the number of tags needed per stripe - this must be the
  43.783 -     *   same across all procs in the communicator.
  43.784 -     */
  43.785 -    /* Ishai: Need to find out what is n_tags */
  43.786 -    exchange_node->n_tags = tree_order * n_levels + 1;
  43.787 -
  43.788 -    /* successful return */
  43.789 -    return OMPI_SUCCESS;
  43.790 -
  43.791 -Error:
  43.792 -    if (exchange_node->rank_extra_sources_array != NULL) {
  43.793 -        free(exchange_node->rank_extra_sources_array);
  43.794 -    }
  43.795 -
  43.796 -    /* error return */
  43.797 -    return OMPI_ERROR;
  43.798 -}
  43.799 -
  43.800 -OMPI_DECLSPEC void mca_common_netpatterns_free_recursive_doubling_tree_node(
  43.801 -    mca_common_netpatterns_pair_exchange_node_t *exchange_node)
  43.802 -{
  43.803 -    NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
  43.804 -    if (exchange_node->rank_extra_sources_array != NULL) {
  43.805 -        free(exchange_node->rank_extra_sources_array);
  43.806 -    }
  43.807 -
  43.808 -    if (exchange_node->rank_exchanges != NULL) {
  43.809 -        free(exchange_node->rank_exchanges);
  43.810 -    }
  43.811 -}
  43.812 -#endif
  43.813 -
  43.814 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
  43.815 -        mca_common_netpatterns_pair_exchange_node_t *exchange_node)
  43.816 -{
  43.817 -    return mca_common_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
  43.818 -}
  43.819 -
  43.820 -#if 0 
  43.821 -/*OMPI_DECLSPEC int old_mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
  43.822 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
  43.823 -        mca_common_netpatterns_pair_exchange_node_t *exchange_node)
  43.824 -{
  43.825 -    /* local variables */
  43.826 -    /*int tree_order;*/
  43.827 -    int i,tmp,cnt,result,n_extra_nodes;
  43.828 -    int n_exchanges;
  43.829 -
  43.830 -    /* figure out number of levels in the tree */
  43.831 -
  43.832 -    n_exchanges=0;
  43.833 -    result=num_nodes;
  43.834 -/*    tree_order=2;*/
  43.835 -    /* cnt - number of ranks in given level */
  43.836 -    cnt=1;
  43.837 -    while( num_nodes > cnt ) {
  43.838 -        cnt*=tree_order;
  43.839 -        n_exchanges++;
  43.840 -    };
  43.841 -
  43.842 -    /* figure out the largest power of 2 that is less than or equal to
  43.843 -     * num_nodes */
  43.844 -    if( cnt > num_nodes) {
  43.845 -        cnt/=tree_order;
  43.846 -        n_exchanges--;
  43.847 -    }
  43.848 -    exchange_node->log_2=n_exchanges;
  43.849 -
  43.850 -    tmp=1;
  43.851 -    for(i=0 ; i < n_exchanges ; i++ ) {
  43.852 -        tmp*=2;
  43.853 -    }
  43.854 -    exchange_node->n_largest_pow_2=tmp;
  43.855 -
  43.856 -    /* set node characteristics - node that is not within the largest
  43.857 -     *  power of 2 will just send it's data to node that will participate
  43.858 -     *  in the recursive doubling, and get the result back at the end.
  43.859 -     */
  43.860 -    if( node_rank+1 > cnt ) {
  43.861 -        exchange_node->node_type=EXTRA_NODE;
  43.862 -    } else {
  43.863 -        exchange_node->node_type=EXCHANGE_NODE;
  43.864 -    }
  43.865 -
  43.866 -    /* set the initial and final data exchanges - those that are not
  43.867 -     *   part of the recursive doubling.
  43.868 -     */
  43.869 -    n_extra_nodes=num_nodes-cnt;
  43.870 -
  43.871 -    if ( EXCHANGE_NODE == exchange_node->node_type ) {
  43.872 -
  43.873 -        if( node_rank < n_extra_nodes ) {
  43.874 -            exchange_node->n_extra_sources=1;
  43.875 -            exchange_node->rank_extra_source=cnt+node_rank;
  43.876 -        } else {
  43.877 -            exchange_node->n_extra_sources=0;
  43.878 -            exchange_node->rank_extra_source=-1;
  43.879 -        }
  43.880 -
  43.881 -    } else {
  43.882 -            exchange_node->n_extra_sources=1;
  43.883 -            exchange_node->rank_extra_source=node_rank-cnt;
  43.884 -    }
  43.885 -
  43.886 -    /* set the exchange pattern */
  43.887 -    if( EXCHANGE_NODE == exchange_node->node_type ) {
  43.888 -
  43.889 -        exchange_node->n_exchanges=n_exchanges;
  43.890 -        exchange_node->rank_exchanges=(int *) malloc
  43.891 -            (n_exchanges*sizeof(int));
  43.892 -        if( NULL == exchange_node->rank_exchanges ) {
  43.893 -            goto Error;
  43.894 -        }
  43.895 -
  43.896 -        /* fill in exchange partners */
  43.897 -        result=1;
  43.898 -        tmp=node_rank;
  43.899 -        for( i=0 ; i < n_exchanges ; i++ ) {
  43.900 -            if(tmp & 1 ) {
  43.901 -                exchange_node->rank_exchanges[i]=
  43.902 -                    node_rank-result;
  43.903 -            } else {
  43.904 -                exchange_node->rank_exchanges[i]=
  43.905 -                    node_rank+result;
  43.906 -            }
  43.907 -            result*=2;
  43.908 -            tmp/=2;
  43.909 -        }
  43.910 -
  43.911 -    } else {
  43.912 -
  43.913 -        exchange_node->n_exchanges=0;
  43.914 -        exchange_node->rank_exchanges=NULL;
  43.915 -
  43.916 -    }
  43.917 -
  43.918 -    /* set the number of tags needed per stripe - this must be the
  43.919 -     *   same across all procs in the communicator.
  43.920 -     */
  43.921 -    exchange_node->n_tags=2*n_exchanges+1;
  43.922 -
  43.923 -    /* Ishai: to make sure free will work also for people that call this function */
  43.924 -    exchange_node->rank_extra_sources_array = NULL;
  43.925 -
  43.926 -    /* successful return */
  43.927 -    return OMPI_SUCCESS;
  43.928 -
  43.929 -Error:
  43.930 -
  43.931 -    /* error return */
  43.932 -    return OMPI_ERROR;
  43.933 -}
  43.934 -#endif
  43.935 -
    44.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_knomial_tree.h	Tue Feb 19 22:36:41 2013 +0000
    44.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.3 @@ -1,253 +0,0 @@
    44.4 -/*
    44.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    44.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    44.7 - * $COPYRIGHT$
    44.8 - *
    44.9 - * Additional copyrights may follow
   44.10 - *
   44.11 - * $HEADER$
   44.12 - */
   44.13 -
   44.14 -#ifndef COMM_PATTERNS_KNOMIAL_TREE_H
   44.15 -#define COMM_PATTERNS_KNOMIAL_TREE_H
   44.16 -
   44.17 -#include "ompi_config.h"
   44.18 -#include "orte/runtime/orte_globals.h"
   44.19 -
   44.20 -BEGIN_C_DECLS
   44.21 -
   44.22 -
   44.23 -/*
   44.24 - * Pair-wise data exchange
   44.25 - */
   44.26 -
   44.27 -/* enum for node type */
   44.28 -enum {
   44.29 -    EXCHANGE_NODE,
   44.30 -    EXTRA_NODE
   44.31 -};
   44.32 -
   44.33 -struct mca_common_netpatterns_pair_exchange_node_t {
   44.34 -
   44.35 -    /* Order of a node in the tree - usually 2 */
   44.36 -    int tree_order;
   44.37 -
   44.38 -    /* number of nodes this node will exchange data with */
   44.39 -    int n_exchanges;
   44.40 -
   44.41 -    /* ranks of nodes involved in data exchnge */
   44.42 -    int *rank_exchanges;
   44.43 -
   44.44 -    /* number of extra sources of data - outside largest power of 2 in
   44.45 -     *  this group */
   44.46 -    int n_extra_sources;
   44.47 -    
   44.48 -    /* rank of the extra source */
   44.49 -    /* deprecated */ int rank_extra_source;
   44.50 -    int *rank_extra_sources_array;
   44.51 -
   44.52 -    /* number of tags needed per stripe */
   44.53 -    int n_tags;
   44.54 -
   44.55 -    /* log 2 of largest full power of 2 for this node set */
   44.56 -    /* deprecated */ int log_2;
   44.57 -    int log_tree_order;
   44.58 -
   44.59 -    /* largest power of 2 that fits in this group */
   44.60 -    /* deprecated */ int n_largest_pow_2;
   44.61 -    int n_largest_pow_tree_order;
   44.62 -
   44.63 -    /* node type */
   44.64 -    int node_type;
   44.65 -
   44.66 -};
   44.67 -typedef struct mca_common_netpatterns_pair_exchange_node_t mca_common_netpatterns_pair_exchange_node_t;
   44.68 -
   44.69 -struct mca_common_netpatterns_payload_t {
   44.70 -    int s_len;
   44.71 -    int r_len;
   44.72 -    int s_offset;
   44.73 -    int r_offset;
   44.74 -};
   44.75 -typedef struct mca_common_netpatterns_payload_t mca_common_netpatterns_payload_t;
   44.76 -
   44.77 -struct mca_common_netpatterns_k_exchange_node_t {
   44.78 -    /* Order of a node in the tree - usually 2 */
   44.79 -    int tree_order;
   44.80 -    /* number of nodes this node will exchange data with */
   44.81 -    int n_exchanges;
   44.82 -    /* total number of exchanges that I actually participate in */
   44.83 -    int n_actual_exchanges;
   44.84 -    /* ranks of nodes involved in data exchnge */
   44.85 -    int **rank_exchanges;
   44.86 -    /* number of extra sources of data - outside largest power of 2 in
   44.87 -     *  this group */
   44.88 -    int n_extra_sources;
   44.89 -    /* rank/s of the extra source */
   44.90 -    int *rank_extra_sources_array;
   44.91 -    /* number of tags needed per stripe */
   44.92 -    int n_tags;
   44.93 -    /* log k of largest full power of k for this node set */
   44.94 -    int log_tree_order;
   44.95 -    /* largest power of k that fits in this group */
   44.96 -    int n_largest_pow_tree_order;
   44.97 -    /* node type */
   44.98 -    int node_type;
   44.99 -    /* start of extra ranks k_nomial */
  44.100 -    int k_nomial_stray;
  44.101 -    /* reindex map */
  44.102 -    int *reindex_map;
  44.103 -    /* inverse of reindex map, i.e. given a reindexed id find out its actual rank */
  44.104 -    int *inv_reindex_map;
  44.105 -    /* reindexed node_rank */
  44.106 -    int reindex_myid;
  44.107 -    /* 2-d array that hold payload info for each level of recursive k-ing */
  44.108 -    mca_common_netpatterns_payload_t **payload_info;
  44.109 -};
  44.110 -typedef struct mca_common_netpatterns_k_exchange_node_t
  44.111 -               mca_common_netpatterns_k_exchange_node_t;
  44.112 -
  44.113 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
  44.114 -    mca_common_netpatterns_pair_exchange_node_t *exchange_node);
  44.115 -
  44.116 -OMPI_DECLSPEC void mca_common_netpatterns_free_recursive_doubling_tree_node(
  44.117 -    mca_common_netpatterns_pair_exchange_node_t *exchange_node);
  44.118 -
  44.119 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
  44.120 -    mca_common_netpatterns_pair_exchange_node_t *exchange_node);
  44.121 -
  44.122 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_tree_node(
  44.123 -   int num_nodes, int node_rank, int tree_order,
  44.124 -   mca_common_netpatterns_k_exchange_node_t *exchange_node);
  44.125 -
  44.126 -OMPI_DECLSPEC int mca_common_netpatterns_setup_recursive_knomial_allgather_tree_node(
  44.127 -        int num_nodes, int node_rank, int tree_order, int *hier_ranks,
  44.128 -        mca_common_netpatterns_k_exchange_node_t *exchange_node);
  44.129 -
  44.130 -
  44.131 -/* Input: k_exchange_node structure 
  44.132 -      Output: index in rank_exchanges array that points 
  44.133 -      to the "start_point" for outgoing send. 
  44.134 -
  44.135 -      Please see below example of usage:
  44.136 -      for (i = start_point ; i > 0; i--) 
  44.137 -          for (k = 0; k < tree_radix; k++) 
  44.138 -              send messages to exchange_node->rank_exchanges[i][k];
  44.139 -*/
  44.140 -
  44.141 -static inline __opal_attribute_always_inline__ 
  44.142 -int mca_common_netpatterns_get_knomial_level(
  44.143 -    int my_rank, int src_rank, 
  44.144 -    int radix,   int size,
  44.145 -    int *k_level)
  44.146 -{
  44.147 -    int distance, 
  44.148 -        pow_k;
  44.149 -    int logk_level = 0;
  44.150 -
  44.151 -    /* Calculate disctance from source of data */
  44.152 -    distance = src_rank - my_rank; 
  44.153 -
  44.154 -    /* Wrap around */
  44.155 -    if (0 > distance) {
  44.156 -        distance += size;
  44.157 -    }
  44.158 -
  44.159 -    pow_k = 1;
  44.160 -    while(distance / (pow_k * radix)) {
  44.161 -        pow_k *= radix;
  44.162 -        ++logk_level;
  44.163 -    }
  44.164 -    --logk_level;
  44.165 -
  44.166 -    *k_level = pow_k;
  44.167 -    return logk_level;
  44.168 -}
  44.169 -
  44.170 -/* Input: my_rank, root, radix, size
  44.171 - * Output: source of the data, offset in power of K
  44.172 - */
  44.173 -static inline __opal_attribute_always_inline__ 
  44.174 -int mca_common_netpatterns_get_knomial_data_source(
  44.175 -    int my_rank, int root, int radix, int size,
  44.176 -    int *k_level, int *logk_level)
  44.177 -{
  44.178 -    int level = radix;
  44.179 -    int step = 0;
  44.180 -
  44.181 -    /* Calculate source of the data */
  44.182 -    while((0 == (root - my_rank) % level) 
  44.183 -            && (level <= size)) {
  44.184 -        level *= radix;
  44.185 -        ++step;
  44.186 -    }   
  44.187 -    
  44.188 -    *k_level = level/radix;
  44.189 -    *logk_level = step;
  44.190 -    return my_rank - (my_rank % level - root % level);
  44.191 -}
  44.192 -
  44.193 -/* Input: my_rank, radix,
  44.194 - *        k_level - that you get from mca_common_netpatterns_get_knomial_data_source
  44.195 - *        k_step - some integer
  44.196 - * Output: peer - next children in the tree
  44.197 - * Usage: 
  44.198 - *         src = mca_common_netpatterns_get_knomial_data_source(
  44.199 - *                  my_rank, root, radix, size,
  44.200 - *                  &k_level, &logk_level)
  44.201 - *         recv_from(src......);
  44.202 - *
  44.203 - *         MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, k_level, my_rank);
  44.204 - *         while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
  44.205 - *              MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer);
  44.206 - *              send_to(peer....);
  44.207 - *         }
  44.208 - * for more example please grep in ptpcoll bcol bcast files
  44.209 - */
  44.210 -
  44.211 -typedef struct mca_common_netpatter_knomial_step_info_t {
  44.212 -    int k_step;
  44.213 -    int k_level;
  44.214 -    int k_tmp_peer;
  44.215 -} mca_common_netpatter_knomial_step_info_t;
  44.216 -
  44.217 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix)\
  44.218 -do {                                                                                    \
  44.219 -    if (1 != step_info.k_step) {                                                        \
  44.220 -        step_info.k_level /= radix;                                                     \
  44.221 -    }                                                                                   \
  44.222 -} while (0)                                                                             \
  44.223 -
  44.224 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, in_k_level, in_peer)\
  44.225 -do {                                                                                \
  44.226 -    step_info.k_step  = 1;                                                          \
  44.227 -    step_info.k_level = in_k_level;                                                 \
  44.228 -    step_info.k_tmp_peer = in_peer;                                                 \
  44.229 -} while (0)
  44.230 -
  44.231 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info) \
  44.232 -                                                    (step_info.k_level > 1)
  44.233 -
  44.234 -#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer)           \
  44.235 -do {                                                                                            \
  44.236 -    int rank_radix_base = my_rank/step_info.k_level;                                            \
  44.237 -                                                                                                \
  44.238 -    peer = step_info.k_tmp_peer + step_info.k_level/radix;                                      \
  44.239 -    if (rank_radix_base != peer/step_info.k_level) {                                            \
  44.240 -        /* Wraparound the number */                                                             \
  44.241 -        peer -= step_info.k_level;                                                              \
  44.242 -        assert(peer >=0);                                                                       \
  44.243 -    }                                                                                           \
  44.244 -    ++step_info.k_step;                                                                         \
  44.245 -    if (radix == step_info.k_step) {                                                            \
  44.246 -        step_info.k_level /= radix;                                                             \
  44.247 -        step_info.k_step = 1;                                                                   \
  44.248 -        step_info.k_tmp_peer = my_rank;                                                         \
  44.249 -    } else {                                                                                    \
  44.250 -        step_info.k_tmp_peer = peer;                                                            \
  44.251 -    }                                                                                           \
  44.252 -                                                                                                \
  44.253 -} while (0)
  44.254 -
  44.255 -END_C_DECLS
  44.256 -#endif
    45.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_multinomial_tree.c	Tue Feb 19 22:36:41 2013 +0000
    45.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.3 @@ -1,190 +0,0 @@
    45.4 -/*
    45.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    45.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    45.7 - * $COPYRIGHT$
    45.8 - *
    45.9 - * Additional copyrights may follow
   45.10 - *
   45.11 - * $HEADER$
   45.12 - */
   45.13 -
   45.14 -#include "ompi_config.h"
   45.15 -#ifdef HAVE_UNISTD_H
   45.16 -#include <unistd.h>
   45.17 -#endif
   45.18 -#include <sys/types.h>
   45.19 -#ifdef HAVE_SYS_MMAN_H
   45.20 -#include <sys/mman.h>
   45.21 -#endif
   45.22 -#include <fcntl.h>
   45.23 -#include <stdlib.h>
   45.24 -
   45.25 -#include "ompi/constants.h"
   45.26 -#include "common_netpatterns.h"
   45.27 -
   45.28 -
   45.29 -/* setup an multi-nomial tree - for each node in the tree
   45.30 - *  this returns it's parent, and it's children */
   45.31 -
   45.32 -OMPI_DECLSPEC int mca_common_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
   45.33 -        mca_common_netpatterns_tree_node_t *tree_nodes)
   45.34 -{
   45.35 -    /* local variables */
   45.36 -    int i,result;
   45.37 -    int cnt, n_nodes_in_this_level,node_index;
   45.38 -    int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
   45.39 -    int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
   45.40 -    int n_lvls_in_tree;
   45.41 -
   45.42 -    /* sanity check */
   45.43 -    if( 1 >= tree_order ) {
   45.44 -        goto Error;
   45.45 -    }
   45.46 -
   45.47 -
   45.48 -    /* figure out number of levels in the tree */
   45.49 -
   45.50 -    n_lvls_in_tree=0;
   45.51 -    result=num_nodes;
   45.52 -    /* cnt - number of ranks in given level */
   45.53 -    cnt=1;
   45.54 -    /*  cummulative count of ranks */
   45.55 -    while( 0 < result ) {
   45.56 -        result-=cnt;
   45.57 -        cnt*=tree_order; 
   45.58 -        n_lvls_in_tree++;
   45.59 -    };  
   45.60 -
   45.61 -    /* loop over tree levels */
   45.62 -    n_nodes_in_this_level=1;
   45.63 -    node_index=-1;
   45.64 -    n_cum_nodes=0;
   45.65 -    for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
   45.66 -
   45.67 -        /* loop over nodes in current level */
   45.68 -        for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
   45.69 -            /* get node index */
   45.70 -            node_index++;
   45.71 -            
   45.72 -            /* break if reach group size */
   45.73 -            if( node_index == num_nodes) {
   45.74 -                break;
   45.75 -            }
   45.76 -
   45.77 -            tree_nodes[node_index].my_rank=node_index;
   45.78 -            tree_nodes[node_index].children_ranks=NULL;
   45.79 -
   45.80 -            /*
   45.81 -             *  Parents
   45.82 -             */
   45.83 -            if( 0 == current_level ) {
   45.84 -                tree_nodes[node_index].n_parents=0;
   45.85 -                /* get parent index */
   45.86 -                tree_nodes[node_index].parent_rank=-1;
   45.87 -            } else {
   45.88 -                tree_nodes[node_index].n_parents=1;
   45.89 -                /* get parent index */
   45.90 -                n_nodes_prev_level=n_nodes_in_this_level/tree_order;
   45.91 -                if( current_level == n_lvls_in_tree -1 ) {
   45.92 -                    /* load balance the lowest level */
   45.93 -                    parent_rank=node-
   45.94 -                        (node/n_nodes_prev_level)*n_nodes_prev_level;
   45.95 -                    parent_rank=n_cum_nodes-n_nodes_prev_level+
   45.96 -                        parent_rank;
   45.97 -                    tree_nodes[node_index].parent_rank=parent_rank;
   45.98 -                } else {
   45.99 -                    tree_nodes[node_index].parent_rank=
  45.100 -                        (n_cum_nodes-n_nodes_prev_level)+node/tree_order;
  45.101 -                }
  45.102 -            }
  45.103 -
  45.104 -            /*
  45.105 -             * Children
  45.106 -             */
  45.107 -
  45.108 -            /* get number of children */
  45.109 -            if( (n_lvls_in_tree-1) == current_level ) {
  45.110 -                /* leaves have no nodes */
  45.111 -                tree_nodes[node_index].n_children=0;
  45.112 -                tree_nodes[node_index].children_ranks=NULL;
  45.113 -            } else {
  45.114 -                /* take into account last level being incomplete */
  45.115 -                if( (n_lvls_in_tree-2) == current_level ) {
  45.116 -                    /* last level is load balanced */
  45.117 -                    n_nodes_in_last_level=num_nodes-
  45.118 -                        (n_cum_nodes+n_nodes_in_this_level);
  45.119 -                    n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
  45.120 -                    n_in_partial_stipe=n_nodes_in_last_level-
  45.121 -                        n_full_stripes*n_nodes_in_this_level;
  45.122 -                    n_children=n_full_stripes;
  45.123 -                    if( n_full_stripes < tree_order ) {
  45.124 -                        if( node <= n_in_partial_stipe-1 ) {
  45.125 -                            n_children++;
  45.126 -                        }
  45.127 -                    }
  45.128 -                    tree_nodes[node_index].n_children=n_children;
  45.129 -                    if( 0 < n_children ) {
  45.130 -                        tree_nodes[node_index].children_ranks=(int *)
  45.131 -                            malloc(sizeof(int)*n_children);
  45.132 -                        if( NULL == tree_nodes[node_index].children_ranks) {
  45.133 -                            goto Error;
  45.134 -                        }
  45.135 -                    } else {
  45.136 -                        tree_nodes[node_index].children_ranks=NULL;
  45.137 -                    }
  45.138 -                    /* fill in list */
  45.139 -                    for( rank=0 ; rank < n_children ; rank++ ) {
  45.140 -                        tree_nodes[node_index].children_ranks[rank]=
  45.141 -                            node+rank*n_nodes_in_this_level;
  45.142 -                        tree_nodes[node_index].children_ranks[rank]+=
  45.143 -                            (n_cum_nodes+n_nodes_in_this_level);
  45.144 -                    }
  45.145 -                } else {
  45.146 -                    n_children=tree_order;
  45.147 -                    tree_nodes[node_index].n_children=tree_order;
  45.148 -                    tree_nodes[node_index].children_ranks=(int *)
  45.149 -                        malloc(sizeof(int)*n_children);
  45.150 -                    if( NULL == tree_nodes[node_index].children_ranks) {
  45.151 -                        goto Error;
  45.152 -                    }
  45.153 -                    for( rank=0 ; rank < n_children ; rank++ ) {
  45.154 -                        tree_nodes[node_index].children_ranks[rank]=
  45.155 -                            rank+tree_order*node;
  45.156 -                        tree_nodes[node_index].children_ranks[rank]+=
  45.157 -                            (n_cum_nodes+n_nodes_in_this_level);
  45.158 -                    }
  45.159 -                }
  45.160 -            }
  45.161 -
  45.162 -        } /* end node loop */
  45.163 -
  45.164 -        /* update helper counters */
  45.165 -        n_cum_nodes+=n_nodes_in_this_level;
  45.166 -        n_nodes_in_this_level*=tree_order;
  45.167 -    }
  45.168 -
  45.169 -    /* set node type */
  45.170 -    for(i=0 ; i < num_nodes ; i++ ) {
  45.171 -        if( 0 == tree_nodes[i].n_parents ) {
  45.172 -            tree_nodes[i].my_node_type=ROOT_NODE;
  45.173 -        } else if ( 0 == tree_nodes[i].n_children ) {
  45.174 -            tree_nodes[i].my_node_type=LEAF_NODE;
  45.175 -        } else {
  45.176 -            tree_nodes[i].my_node_type=INTERIOR_NODE;
  45.177 -        }
  45.178 -    }
  45.179 -
  45.180 -    /* successful return */
  45.181 -    return OMPI_SUCCESS;
  45.182 -
  45.183 -Error:
  45.184 -    /* free allocated memory */
  45.185 -    for( i=0 ; i < num_nodes ; i++ ) {
  45.186 -        if( NULL != tree_nodes[i].children_ranks ) {
  45.187 -            free(tree_nodes[i].children_ranks);
  45.188 -        }
  45.189 -    }
  45.190 -
  45.191 -    /* error return */
  45.192 -    return OMPI_ERROR;
  45.193 -}
    46.1 --- a/ompi/mca/common/netpatterns/common_netpatterns_nary_tree.c	Tue Feb 19 22:36:41 2013 +0000
    46.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.3 @@ -1,442 +0,0 @@
    46.4 -/*
    46.5 - * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    46.6 - * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    46.7 - * $COPYRIGHT$
    46.8 - *
    46.9 - * Additional copyrights may follow
   46.10 - *
   46.11 - * $HEADER$
   46.12 - */
   46.13 -
   46.14 -#include "ompi_config.h"
   46.15 -#ifdef HAVE_UNISTD_H
   46.16 -#include <unistd.h>
   46.17 -#endif
   46.18 -#include <sys/types.h>
   46.19 -#ifdef HAVE_SYS_MMAN_H
   46.20 -#include <sys/mman.h>
   46.21 -#endif
   46.22 -#include <fcntl.h>
   46.23 -#include <errno.h>
   46.24 -#include <stdlib.h>
   46.25 -
   46.26 -#include "ompi/constants.h"
   46.27 -#include "common_netpatterns.h"
   46.28 -
   46.29 -/*
   46.30 - * Create mmaped shared file
   46.31 - */
   46.32 -
   46.33 -/* setup an n-array tree */
   46.34 -
   46.35 -int mca_common_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
   46.36 -        mca_common_netpatterns_tree_node_t *my_node)
   46.37 -{
   46.38 -    /* local variables */
   46.39 -    int n_levels, result;
   46.40 -    int my_level_in_tree, cnt;
   46.41 -    int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
   46.42 -    int start_index,end_index;
   46.43 -
   46.44 -    /* sanity check */
   46.45 -    if( 1 >= tree_order ) {
   46.46 -        goto Error;
   46.47 -    }
   46.48 -
   46.49 -    my_node->my_rank=my_rank;
   46.50 -    my_node->tree_size=num_nodes;
   46.51 -
   46.52 -    /* figure out number of levels in tree */
   46.53 -    n_levels=0;
   46.54 -    result=num_nodes-1;
   46.55 -    while (0 < result ) {
   46.56 -        result/=tree_order;
   46.57 -        n_levels++;
   46.58 -    };
   46.59 -
   46.60 -    /* figure out who my children and parents are */
   46.61 -    my_level_in_tree=-1;
   46.62 -    result=my_rank;
   46.63 -    /* cnt - number of ranks in given level */
   46.64 -    cnt=1;
   46.65 -    /*  cummulative count of ranks */
   46.66 -    while( 0 <= result ) {
   46.67 -        result-=cnt;
   46.68 -        cnt*=tree_order;
   46.69 -        my_level_in_tree++;
   46.70 -    };
   46.71 -    /* int my_level_in_tree, n_children, n_parents; */
   46.72 -
   46.73 -    if( 0 == my_rank ) {
   46.74 -        my_node->n_parents=0;
   46.75 -        my_node->parent_rank=-1;
   46.76 -        my_rank_in_my_level=0;
   46.77 -    } else {
   46.78 -        my_node->n_parents=1;
   46.79 -        cnt=1;
   46.80 -        cum_cnt=0;
   46.81 -        for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
   46.82 -            /* cummulative count up to this level */
   46.83 -            cum_cnt+=cnt;
   46.84 -            /* number of ranks in this level */
   46.85 -            cnt*=tree_order;
   46.86 -        }
   46.87 -        my_rank_in_my_level=my_rank-cum_cnt;
   46.88 -        /* tree_order consecutive ranks have the same parent */
   46.89 -        my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
   46.90 -    }
   46.91 -    
   46.92 -    /* figure out number of levels in the tree */
   46.93 -    n_lvls_in_tree=0;
   46.94 -    result=num_nodes;
   46.95 -    /* cnt - number of ranks in given level */
   46.96 -    cnt=1;
   46.97 -    /*  cummulative count of ranks */
   46.98 -    while( 0 < result ) {
   46.99 -        result-=cnt;
  46.100 -        cnt*=tree_order;
  46.101 -        n_lvls_in_tree++;
  46.102 -    };
  46.103 -
  46.104 -    my_node->children_ranks=(int *)NULL;
  46.105 -
  46.106 -    /* get list of children */
  46.107 -    if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
  46.108 -        /* last level has no children */
  46.109 -        my_node->n_children=0;
  46.110 -    } else {
  46.111 -        cum_cnt=0;
  46.112 -        cnt=1;
  46.113 -        for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
  46.114 -            cum_cnt+=cnt;
  46.115 -            cnt*=tree_order;
  46.116 -        }
  46.117 -        start_index=cum_cnt+my_rank_in_my_level*tree_order;
  46.118 -        end_index=start_index+tree_order-1;
  46.119 -
  46.120 -        /* don't go out of bounds at the end of the list */
  46.121 -        if( end_index >= num_nodes ) {
  46.122 -            end_index = num_nodes-1;
  46.123 -        }
  46.124 -
  46.125 -        if( start_index <= (num_nodes-1) ) {
  46.126 -            my_node->n_children=end_index-start_index+1;
  46.127 -        } else {
  46.128 -            my_node->n_children=0;
  46.129 -        }
  46.130 -
  46.131 -        my_node->children_ranks=NULL;
  46.132 -        if( 0 < my_node->n_children ) {
  46.133 -            my_node->children_ranks=
  46.134 -                (int *)malloc( sizeof(int)*my_node->n_children);
  46.135 -            if( NULL == my_node->children_ranks) {
  46.136 -                goto Error;
  46.137 -            }
  46.138 -            for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
  46.139 -                my_node->children_ranks[lvl-start_index]=lvl;
  46.140 -            }
  46.141 -        } 
  46.142 -    }
  46.143 -    /* set node type */
  46.144 -    if( 0 == my_node->n_parents ) {
  46.145 -        my_node->my_node_type=ROOT_NODE;
  46.146 -    } else if ( 0 == my_node->n_children ) {
  46.147 -        my_node->my_node_type=LEAF_NODE;
  46.148 -    } else {
  46.149 -        my_node->my_node_type=INTERIOR_NODE;
  46.150 -    }
  46.151 -
  46.152 -
  46.153 -    /* successful return */
  46.154 -    return OMPI_SUCCESS;
  46.155 -
  46.156 -Error:
  46.157 -
  46.158 -    /* error return */
  46.159 -    return OMPI_ERROR;
  46.160 -}
  46.161 -
  46.162 -int mca_common_netpatterns_setup_narray_knomial_tree(
  46.163 -        int tree_order, int my_rank, int num_nodes,
  46.164 -        mca_common_netpatterns_narray_knomial_tree_node_t *my_node)
  46.165 -{
  46.166 -    /* local variables */
  46.167 -    int n_levels, result;
  46.168 -    int my_level_in_tree, cnt ;
  46.169 -    int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
  46.170 -    int start_index,end_index;
  46.171 -    int rc;
  46.172 -
  46.173 -    /* sanity check */
  46.174 -    if( 1 >= tree_order ) {
  46.175 -        goto Error;
  46.176 -    }
  46.177 -
  46.178 -    my_node->my_rank=my_rank;
  46.179 -    my_node->tree_size=num_nodes;
  46.180 -
  46.181 -    /* figure out number of levels in tree */
  46.182 -    n_levels=0;
  46.183 -    result=num_nodes-1;
  46.184 -    while (0 < result ) {
  46.185 -        result/=tree_order;
  46.186 -        n_levels++;
  46.187 -    };
  46.188 -
  46.189 -    /* figure out who my children and parents are */
  46.190 -    my_level_in_tree=-1;
  46.191 -    result=my_rank;
  46.192 -    /* cnt - number of ranks in given level */
  46.193 -    cnt=1;
  46.194 -    /*  cummulative count of ranks */
  46.195 -    while( 0 <= result ) {
  46.196 -        result-=cnt;
  46.197 -        cnt*=tree_order;
  46.198 -        my_level_in_tree++;
  46.199 -    };
  46.200 -    /* int my_level_in_tree, n_children, n_parents; */
  46.201 -
  46.202 -    if( 0 == my_rank ) {
  46.203 -        my_node->n_parents=0;
  46.204 -        my_node->parent_rank=-1;
  46.205 -        my_rank_in_my_level=0;
  46.206 -    } else {
  46.207 -        my_node->n_parents=1;
  46.208 -        cnt=1;
  46.209 -        cum_cnt=0;
  46.210 -        for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
  46.211 -            /* cummulative count up to this level */
  46.212 -            cum_cnt+=cnt;
  46.213 -            /* number of ranks in this level */
  46.214 -            cnt*=tree_order;
  46.215 -        }
  46.216 -
  46.217 -        my_node->rank_on_level = 
  46.218 -            my_rank_in_my_level = 
  46.219 -            my_rank-cum_cnt;
  46.220 -        my_node->level_size = cnt;
  46.221 -
  46.222 -        rc = mca_common_netpatterns_setup_recursive_knomial_tree_node(
  46.223 -                my_node->level_size, my_node->rank_on_level, 
  46.224 -                tree_order, &my_node->k_node);
  46.225 -        if (OMPI_SUCCESS != rc) {
  46.226 -            goto Error;
  46.227 -        }
  46.228 -
  46.229 -        /* tree_order consecutive ranks have the same parent */
  46.230 -        my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
  46.231 -    }
  46.232 -    
  46.233 -    /* figure out number of levels in the tree */
  46.234 -    n_lvls_in_tree=0;
  46.235 -    result=num_nodes;
  46.236 -    /* cnt - number of ranks in given level */
  46.237 -    cnt=1;
  46.238 -    /*  cummulative count of ranks */
  46.239 -    while( 0 < result ) {
  46.240 -        result-=cnt;
  46.241 -        cnt*=tree_order;
  46.242 -        n_lvls_in_tree++;
  46.243 -    };
  46.244 -
  46.245 -    if(result < 0) {
  46.246 -        /* reset the size on group */
  46.247 -        num_nodes = cnt / tree_order;
  46.248 -    }
  46.249 -
  46.250 -    my_node->children_ranks=(int *)NULL;
  46.251 -
  46.252 -    /* get list of children */
  46.253 -    if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
  46.254 -        /* last level has no children */
  46.255 -        my_node->n_children=0;
  46.256 -    } else {
  46.257 -        cum_cnt=0;
  46.258 -        cnt=1;
  46.259 -        for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
  46.260 -            cum_cnt+=cnt;
  46.261 -            cnt*=tree_order;
  46.262 -        }
  46.263 -        start_index=cum_cnt+my_rank_in_my_level*tree_order;
  46.264 -        end_index=start_index+tree_order-1;
  46.265 -
  46.266 -        /* don't go out of bounds at the end of the list */
  46.267 -        if( end_index >= num_nodes ) {
  46.268 -            end_index = num_nodes-1;
  46.269 -        }
  46.270 -
  46.271 -        if( start_index <= (num_nodes-1) ) {
  46.272 -            my_node->n_children=end_index-start_index+1;
  46.273 -        } else {
  46.274 -            my_node->n_children=0;
  46.275 -        }
  46.276 -
  46.277 -        my_node->children_ranks=NULL;
  46.278 -        if( 0 < my_node->n_children ) {
  46.279 -            my_node->children_ranks=
  46.280 -                (int *)malloc( sizeof(int)*my_node->n_children);
  46.281 -            if( NULL == my_node->children_ranks) {
  46.282 -                goto Error;
  46.283 -            }
  46.284 -            for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
  46.285 -                my_node->children_ranks[lvl-start_index]=lvl;
  46.286 -            }
  46.287 -        } 
  46.288 -    }
  46.289 -    /* set node type */
  46.290 -    if( 0 == my_node->n_parents ) {
  46.291 -        my_node->my_node_type=ROOT_NODE;
  46.292 -    } else if ( 0 == my_node->n_children ) {
  46.293 -        my_node->my_node_type=LEAF_NODE;
  46.294 -    } else {
  46.295 -        my_node->my_node_type=INTERIOR_NODE;
  46.296 -    }
  46.297 -
  46.298 -
  46.299 -    /* successful return */
  46.300 -    return OMPI_SUCCESS;
  46.301 -
  46.302 -Error:
  46.303 -
  46.304 -    /* error return */
  46.305 -    return OMPI_ERROR;
  46.306 -}
  46.307 -
  46.308 -/* calculate the nearest power of radix that is equal to or greater
  46.309 - * than size, with the specified radix.  The resulting tree is of
  46.310 - * depth n_lvls.
  46.311 - */
  46.312 -OMPI_DECLSPEC int roundup_to_power_radix ( int radix, int size, int *n_lvls )
  46.313 -{
  46.314 -    int n_levels=0, return_value=1;
  46.315 -    int result;
  46.316 -    if( 1 > size ) {
  46.317 -        return 0;
  46.318 -    }
  46.319 -
  46.320 -    result=size-1;
  46.321 -    while (0 < result ) {
  46.322 -        result/=radix;
  46.323 -        n_levels++;
  46.324 -        return_value*=radix;
  46.325 -    };
  46.326 -    *n_lvls=n_levels;
  46.327 -    return return_value;
  46.328 -}
  46.329 -
  46.330 -static int fill_in_node_data(int tree_order, int num_nodes, int my_node,
  46.331 -        mca_common_netpatterns_tree_node_t *nodes_data)
  46.332 -{
  46.333 -    /* local variables */
  46.334 -    int rc, num_ranks_per_child, num_children, n_extra;
  46.335 -    int child, rank, n_to_offset, n_ranks_to_child;
  46.336 -    
  46.337 -    /* figure out who are my children */
  46.338 -    num_ranks_per_child=num_nodes/tree_order;
  46.339 -    if( num_ranks_per_child ) {
  46.340 -        num_children=tree_order;
  46.341 -        n_extra=num_nodes-num_ranks_per_child*tree_order;
  46.342 -    } else {
  46.343 -        num_children=num_nodes;
  46.344 -        /* each child has the same number of descendents - 1 */
  46.345 -        n_extra=0;
  46.346 -        /* when there is a child, there is at least one
  46.347 -         * descendent */
  46.348 -        num_ranks_per_child=1;
  46.349 -    }
  46.350 -
  46.351 -    nodes_data[my_node].n_children=num_children;
  46.352 -    if( num_children ) {
  46.353 -        nodes_data[my_node].children_ranks=(int *)
  46.354 -            malloc(sizeof(int)*num_children);
  46.355 -        if(!nodes_data[my_node].children_ranks) {
  46.356 -
  46.357 -            if ( NULL == nodes_data[my_node].children_ranks )
  46.358 -            {
  46.359 -                fprintf(stderr, "Cannot allocate memory for children_ranks.\n");
  46.360 -                rc = OMPI_ERR_OUT_OF_RESOURCE;
  46.361 -                goto error;
  46.362 -            }
  46.363 -        }
  46.364 -    }
  46.365 -
  46.366 -    rank = my_node;
  46.367 -    for( child=0 ; child < num_children ; child ++ ) {
  46.368 -
  46.369 -    /* set parent information */
  46.370 -        nodes_data[rank].n_parents=1;
  46.371 -        nodes_data[rank].parent_rank=my_node;
  46.372 -        if( n_extra ) {
  46.373 -            n_to_offset=child;
  46.374 -            if( n_to_offset > n_extra){
  46.375 -                n_to_offset=n_extra;
  46.376 -            }
  46.377 -        } else {
  46.378 -            n_to_offset=0;
  46.379 -        }
  46.380 -
  46.381 -        rank=my_node+1+child*num_ranks_per_child;
  46.382 -        rank+=n_to_offset;
  46.383 -
  46.384 -        /* set parent information */        
  46.385 -        nodes_data[rank].n_parents=1;
  46.386 -        nodes_data[rank].parent_rank=my_node;
  46.387 -
  46.388 -        n_ranks_to_child=num_ranks_per_child;
  46.389 -        if(n_extra && (child < n_extra) ) {
  46.390 -            n_ranks_to_child++;
  46.391 -        }
  46.392 -    
  46.393 -        /* set child information */
  46.394 -        nodes_data[my_node].children_ranks[child]=rank;
  46.395 -
  46.396 -        /* remove the child from the list of ranks */
  46.397 -        n_ranks_to_child--;
  46.398 -        rc=fill_in_node_data(tree_order, n_ranks_to_child, rank, nodes_data);
  46.399 -        if( OMPI_SUCCESS != rc ) {
  46.400 -            goto error;
  46.401 -        }
  46.402 -
  46.403 -    }
  46.404 -
  46.405 -    /* return */
  46.406 -    return OMPI_SUCCESS;
  46.407 -
  46.408 -    /* Error */
  46.409 -error:
  46.410 -    return rc;
  46.411 -
  46.412 -}
  46.413 -
  46.414 -/*
  46.415 - * This routine sets up the array describing the communication tree for
  46.416 - * a k-ary tree where the children form a contiguous range of ranks at
  46.417 - * each level.  The assumption here is that rank 0 is always the root -
  46.418 - * ranks may be rotated based on who the actual root is, to obtain the
  46.419 - * appropriate communication pattern for such roots.
  46.420 - */
  46.421 -OMPI_DECLSPEC int mca_common_netpatterns_setup_narray_tree_contigous_ranks(
  46.422 -        int tree_order, int num_nodes, 
  46.423 -        mca_common_netpatterns_tree_node_t **tree_nodes)
  46.424 -{
  46.425 -    /* local variables */
  46.426 -    int num_descendent_ranks=num_nodes-1;
  46.427 -    int rc=OMPI_SUCCESS;
  46.428 -
  46.429 -    *tree_nodes=(mca_common_netpatterns_tree_node_t *)malloc(
  46.430 -            sizeof(mca_common_netpatterns_tree_node_t)*
  46.431 -            num_nodes);
  46.432 -    if(!(*tree_nodes) ) {
  46.433 -        fprintf(stderr, "Cannot allocate memory for tree_nodes.\n");
  46.434 -        rc = OMPI_ERR_OUT_OF_RESOURCE;
  46.435 -        return rc;
  46.436 -    }
  46.437 -
  46.438 -    (*tree_nodes)[0].n_parents=0;
  46.439 -    rc=fill_in_node_data(tree_order, 
  46.440 -            num_descendent_ranks, 0, *tree_nodes);
  46.441 -
  46.442 -    /* successful return */
  46.443 -    return rc;
  46.444 -
  46.445 -}
    47.1 --- a/ompi/mca/sbgp/basesmsocket/Makefile.am	Tue Feb 19 22:36:41 2013 +0000
    47.2 +++ b/ompi/mca/sbgp/basesmsocket/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
    47.3 @@ -35,8 +35,7 @@
    47.4  mcacomponent_LTLIBRARIES = $(component_install)
    47.5  mca_sbgp_basesmsocket_la_SOURCES = $(sources)
    47.6  mca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version
    47.7 -mca_sbgp_basesmsocket_la_LIBADD = \
    47.8 -                                 $(top_ompi_builddir)/ompi/mca/common/commpatterns/libmca_common_commpatterns.la 
    47.9 +mca_sbgp_basesmsocket_la_LIBADD = 
   47.10  
   47.11  noinst_LTLIBRARIES = $(component_noinst)
   47.12  libmca_sbgp_basesmsocket_la_SOURCES =$(sources)
    48.1 --- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c	Tue Feb 19 22:36:41 2013 +0000
    48.2 +++ b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c	Tue Feb 19 22:50:56 2013 +0000
    48.3 @@ -39,7 +39,7 @@
    48.4  #include "ompi/communicator/communicator.h"
    48.5  #include "sbgp_basesmsocket.h"
    48.6  
    48.7 -#include "ompi/mca/common/commpatterns/common_coll_ops.h"
    48.8 +#include "ompi/patterns/comm/coll_ops.h"
    48.9  
   48.10  
   48.11  /*
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/ompi/patterns/comm/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
    49.3 @@ -0,0 +1,16 @@
    49.4 +# Copyright (c) 2013 Oak Ridge National Laboratory.  All rights reserved.
    49.5 +# $COPYRIGHT$
    49.6 +# 
    49.7 +# Additional copyrights may follow
    49.8 +# 
    49.9 +# $HEADER$
   49.10 +#
   49.11 +
   49.12 +headers += \
   49.13 +        patterns/comm/coll_ops.h \
   49.14 +        patterns/comm/commpatterns.h
   49.15 +
   49.16 +libmpi_la_SOURCES += \
   49.17 +        patterns/comm/allreduce.c \
   49.18 +        patterns/comm/allgather.c \
   49.19 +        patterns/comm/bcast.c
    50.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.2 +++ b/ompi/patterns/comm/allgather.c	Tue Feb 19 22:50:56 2013 +0000
    50.3 @@ -0,0 +1,288 @@
    50.4 +/*
    50.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    50.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    50.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    50.8 + *                         All rights reserved.
    50.9 + * $COPYRIGHT$
   50.10 + * 
   50.11 + * Additional copyrights may follow
   50.12 + * 
   50.13 + * $HEADER$
   50.14 + */
   50.15 +/** @file */
   50.16 +
   50.17 +#include "ompi_config.h"
   50.18 +
   50.19 +#include "ompi/constants.h"
   50.20 +#include "ompi/op/op.h"
   50.21 +#include "ompi/datatype/ompi_datatype.h"
   50.22 +#include "ompi/communicator/communicator.h"
   50.23 +#include "opal/include/opal/sys/atomic.h"
   50.24 +#include "ompi/mca/pml/pml.h"
   50.25 +#include "ompi/patterns/net/netpatterns.h"
   50.26 +#include "coll_ops.h"
   50.27 +
   50.28 +/**
   50.29 + * All-reduce - subgroup in communicator
   50.30 + */
   50.31 +OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
   50.32 +        ompi_datatype_t *dtype, int my_rank_in_group,
   50.33 +        int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
   50.34 +{
   50.35 +    /* local variables */
   50.36 +    int rc=OMPI_SUCCESS,msg_cnt;
   50.37 +    int pair_rank,exchange,extra_rank, n_extra_nodes,n_extra;
   50.38 +    int proc_block,extra_start,extra_end,iovec_len;
   50.39 +    int remote_data_start_rank,remote_data_end_rank;
   50.40 +    int local_data_start_rank;
   50.41 +    netpatterns_pair_exchange_node_t my_exchange_node;
   50.42 +    size_t message_extent,current_data_extent,current_data_count;
   50.43 +    size_t dt_size;
   50.44 +    OPAL_PTRDIFF_TYPE dt_extent;
   50.45 +    char *src_buf_current;
   50.46 +    char *dest_buf_current;
   50.47 +    struct iovec send_iov[2] = {{0,0},{0,0}}, 
   50.48 +                 recv_iov[2] = {{0,0},{0,0}};
   50.49 +    ompi_request_t *requests[4];
   50.50 +
   50.51 +    /* get size of data needed - same layout as user data, so that
   50.52 +     *   we can apply the reudction routines directly on these buffers
   50.53 +     */
   50.54 +    rc = ompi_datatype_type_size(dtype, &dt_size);
   50.55 +    if( OMPI_SUCCESS != rc ) {
   50.56 +        goto Error;
   50.57 +    }
   50.58 +
   50.59 +    rc = ompi_datatype_type_extent(dtype, &dt_extent);
   50.60 +    if( OMPI_SUCCESS != rc ) {
   50.61 +        goto Error;
   50.62 +    }
   50.63 +    message_extent = dt_extent*count;
   50.64 +
   50.65 +    /* place my data in the correct destination buffer */
   50.66 +    rc=ompi_datatype_copy_content_same_ddt(dtype,count,
   50.67 +            (char *)dest_buf+my_rank_in_group*message_extent,
   50.68 +            (char *)src_buf);
   50.69 +    if( OMPI_SUCCESS != rc ) {
   50.70 +        goto Error;
   50.71 +    }
   50.72 +
   50.73 +    /* 1 process special case */
   50.74 +    if(1 == n_peers) {
   50.75 +        return OMPI_SUCCESS;
   50.76 +    }
   50.77 +
   50.78 +    /* get my reduction communication pattern */
   50.79 +    rc = netpatterns_setup_recursive_doubling_tree_node(n_peers, 
   50.80 +            my_rank_in_group, &my_exchange_node);
   50.81 +    if(OMPI_SUCCESS != rc){
   50.82 +        return rc;
   50.83 +    }
   50.84 +
   50.85 +    n_extra_nodes=n_peers-my_exchange_node.n_largest_pow_2;
   50.86 +
   50.87 +    /* get the data from the extra sources */
   50.88 +    if(0 < my_exchange_node.n_extra_sources)  {
   50.89 +
   50.90 +        if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
   50.91 +
   50.92 +            /*
   50.93 +             ** Receive data from extra node
   50.94 +             */
   50.95 +
   50.96 +            extra_rank=my_exchange_node.rank_extra_source;
   50.97 +            /* receive the data into the correct location - will use 2
   50.98 +             * messages in the recursive doubling phase */
   50.99 +            dest_buf_current=(char *)dest_buf+message_extent*extra_rank;
  50.100 +            rc=MCA_PML_CALL(recv(dest_buf_current,
  50.101 +                    count,dtype,ranks_in_comm[extra_rank],
  50.102 +                    -OMPI_COMMON_TAG_ALLREDUCE,
  50.103 +                    comm, MPI_STATUSES_IGNORE));
  50.104 +            if( 0 > rc ) {
  50.105 +                goto  Error;
  50.106 +            }
  50.107 +
  50.108 +        } else {
  50.109 +
  50.110 +            /*
  50.111 +             ** Send data to "partner" node
  50.112 +             */
  50.113 +            extra_rank=my_exchange_node.rank_extra_source;
  50.114 +            src_buf_current=(char *)src_buf;
  50.115 +            rc=MCA_PML_CALL(send(src_buf_current,
  50.116 +                    count,dtype,ranks_in_comm[extra_rank],
  50.117 +                    -OMPI_COMMON_TAG_ALLREDUCE,
  50.118 +                    MCA_PML_BASE_SEND_STANDARD,
  50.119 +                    comm));
  50.120 +            if( 0 > rc ) {
  50.121 +                goto  Error;
  50.122 +            }
  50.123 +        }
  50.124 +    }
  50.125 +
  50.126 +    current_data_extent=message_extent;
  50.127 +    current_data_count=count;
  50.128 +    src_buf_current=(char *)dest_buf+my_rank_in_group*message_extent;
  50.129 +    proc_block=1;
  50.130 +    local_data_start_rank=my_rank_in_group;
  50.131 +    /* loop over data exchanges */
  50.132 +    for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
  50.133 +
  50.134 +        /* is the remote data read */
  50.135 +        pair_rank=my_exchange_node.rank_exchanges[exchange];
  50.136 +        msg_cnt=0;
  50.137 +
  50.138 +        /*
  50.139 +         * Power of 2 data segment 
  50.140 +         */
  50.141 +        /* post non-blocking receive */
  50.142 +        if(pair_rank > my_rank_in_group ){
  50.143 +            recv_iov[0].iov_base=src_buf_current+current_data_extent;
  50.144 +            recv_iov[0].iov_len=current_data_extent;
  50.145 +            iovec_len=1;
  50.146 +            remote_data_start_rank=local_data_start_rank+proc_block;
  50.147 +            remote_data_end_rank=remote_data_start_rank+proc_block-1;
  50.148 +        } else {
  50.149 +            recv_iov[0].iov_base=src_buf_current-current_data_extent;
  50.150 +            recv_iov[0].iov_len=current_data_extent;
  50.151 +            iovec_len=1;
  50.152 +            remote_data_start_rank=local_data_start_rank-proc_block;
  50.153 +            remote_data_end_rank=remote_data_start_rank+proc_block-1;
  50.154 +        }
  50.155 +        /* the data from the non power of 2 ranks */
  50.156 +        if(remote_data_start_rank<n_extra_nodes) {
  50.157 +            /* figure out how much data is at the remote rank */
  50.158 +            /* last rank with data */
  50.159 +            extra_start=remote_data_start_rank;
  50.160 +            extra_end=remote_data_end_rank;
  50.161 +            if(extra_end >= n_extra_nodes ) {
  50.162 +                /* if last rank exceeds the ranks with extra data,
  50.163 +                 * adjust this.
  50.164 +                 */
  50.165 +                extra_end=n_extra_nodes-1;
  50.166 +            }
  50.167 +            /* get the number of ranks whos data is to be grabbed */
  50.168 +            n_extra=extra_end-extra_start+1;
  50.169 +
  50.170 +            recv_iov[1].iov_base=(char *)dest_buf+
  50.171 +                (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
  50.172 +            recv_iov[1].iov_len=n_extra*count;
  50.173 +            iovec_len=2;
  50.174 +        }
  50.175 +
  50.176 +        rc=MCA_PML_CALL(irecv(recv_iov[0].iov_base,
  50.177 +                    current_data_count,dtype,ranks_in_comm[pair_rank],
  50.178 +                    -OMPI_COMMON_TAG_ALLREDUCE,
  50.179 +                    comm,&(requests[msg_cnt])));
  50.180 +        if( 0 > rc ) {
  50.181 +            goto Error;
  50.182 +        }
  50.183 +        msg_cnt++;
  50.184 +
  50.185 +        if(iovec_len > 1 ) {
  50.186 +            rc=MCA_PML_CALL(irecv(recv_iov[1].iov_base,
  50.187 +                        recv_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
  50.188 +                        -OMPI_COMMON_TAG_ALLREDUCE,
  50.189 +                        comm,&(requests[msg_cnt])));
  50.190 +            if( 0 > rc ) {
  50.191 +                goto Error;
  50.192 +            }
  50.193 +            msg_cnt++;
  50.194 +        }
  50.195 +
  50.196 +        /* post non-blocking send */
  50.197 +        send_iov[0].iov_base=src_buf_current;
  50.198 +        send_iov[0].iov_len=current_data_extent;
  50.199 +        iovec_len=1;
  50.200 +        /* the data from the non power of 2 ranks */
  50.201 +        if(local_data_start_rank<n_extra_nodes) {
  50.202 +            /* figure out how much data is at the remote rank */
  50.203 +            /* last rank with data */
  50.204 +            extra_start=local_data_start_rank;
  50.205 +            extra_end=extra_start+proc_block-1;
  50.206 +            if(extra_end >= n_extra_nodes ) {
  50.207 +                /* if last rank exceeds the ranks with extra data,
  50.208 +                 * adjust this.
  50.209 +                 */
  50.210 +                extra_end=n_extra_nodes-1;
  50.211 +            }
  50.212 +            /* get the number of ranks whos data is to be grabbed */
  50.213 +            n_extra=extra_end-extra_start+1;
  50.214 +
  50.215 +            send_iov[1].iov_base=(char *)dest_buf+
  50.216 +                (extra_start+my_exchange_node.n_largest_pow_2)*message_extent;
  50.217 +            send_iov[1].iov_len=n_extra*count;
  50.218 +            iovec_len=2;
  50.219 +        }
  50.220 +
  50.221 +        rc=MCA_PML_CALL(isend(send_iov[0].iov_base,
  50.222 +                    current_data_count,dtype,ranks_in_comm[pair_rank],
  50.223 +                    -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
  50.224 +                    comm,&(requests[msg_cnt])));
  50.225 +        if( 0 > rc ) {
  50.226 +            goto Error;
  50.227 +        }
  50.228 +        msg_cnt++;
  50.229 +        if( iovec_len > 1 ) { 
  50.230 +            rc=MCA_PML_CALL(isend(send_iov[1].iov_base,
  50.231 +                        send_iov[1].iov_len,dtype,ranks_in_comm[pair_rank],
  50.232 +                        -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
  50.233 +                        comm,&(requests[msg_cnt])));
  50.234 +            if( 0 > rc ) {
  50.235 +                goto Error;
  50.236 +            }
  50.237 +            msg_cnt++;
  50.238 +        }
  50.239 +
  50.240 +        /* prepare the source buffer for the next iteration */
  50.241 +        if(pair_rank < my_rank_in_group ){
  50.242 +            src_buf_current-=current_data_extent;
  50.243 +            local_data_start_rank-=proc_block;
  50.244 +        } 
  50.245 +        proc_block*=2;
  50.246 +        current_data_extent*=2;
  50.247 +        current_data_count*=2;
  50.248 +
  50.249 +        /* wait on send and receive completion */
  50.250 +        ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
  50.251 +    }
  50.252 +
  50.253 +    /* copy data in from the "extra" source, if need be */
  50.254 +    if(0 < my_exchange_node.n_extra_sources)  {
  50.255 +
  50.256 +        if ( EXTRA_NODE == my_exchange_node.node_type ) {
  50.257 +            /* 
  50.258 +             ** receive the data 
  50.259 +             ** */
  50.260 +            extra_rank=my_exchange_node.rank_extra_source;
  50.261 +
  50.262 +            rc=MCA_PML_CALL(recv(dest_buf,
  50.263 +                    count*n_peers,dtype,ranks_in_comm[extra_rank],
  50.264 +                    -OMPI_COMMON_TAG_ALLREDUCE,
  50.265 +                    comm,MPI_STATUSES_IGNORE));
  50.266 +            if(0 > rc ) {
  50.267 +                goto  Error;
  50.268 +            }
  50.269 +        } else {
  50.270 +            /* send the data to the pair-rank outside of the power of 2 set
  50.271 +             ** of ranks
  50.272 +             */
  50.273 +
  50.274 +            extra_rank=my_exchange_node.rank_extra_source;
  50.275 +            rc=MCA_PML_CALL(send(dest_buf,
  50.276 +                    count*n_peers,dtype,ranks_in_comm[extra_rank],
  50.277 +                    -OMPI_COMMON_TAG_ALLREDUCE,
  50.278 +                    MCA_PML_BASE_SEND_STANDARD,
  50.279 +                    comm));
  50.280 +            if( 0 > rc ) {
  50.281 +                goto  Error;
  50.282 +            }
  50.283 +        }
  50.284 +    }
  50.285 +
  50.286 +    /* return */
  50.287 +    return OMPI_SUCCESS;
  50.288 +
  50.289 +Error:
  50.290 +    return rc;
  50.291 +}
    51.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.2 +++ b/ompi/patterns/comm/allreduce.c	Tue Feb 19 22:50:56 2013 +0000
    51.3 @@ -0,0 +1,255 @@
    51.4 +/*
    51.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    51.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    51.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    51.8 + *                         All rights reserved.
    51.9 + * $COPYRIGHT$
   51.10 + * 
   51.11 + * Additional copyrights may follow
   51.12 + * 
   51.13 + * $HEADER$
   51.14 + */
   51.15 +/** @file */
   51.16 +
   51.17 +#include "ompi_config.h"
   51.18 +
   51.19 +#include "ompi/constants.h"
   51.20 +#include "ompi/op/op.h"
   51.21 +#include "ompi/datatype/ompi_datatype.h"
   51.22 +#include "ompi/communicator/communicator.h"
   51.23 +#include "opal/include/opal/sys/atomic.h"
   51.24 +#include "ompi/mca/pml/pml.h"
   51.25 +#include "ompi/patterns/net/netpatterns.h"
   51.26 +#include "coll_ops.h"
   51.27 +#include "commpatterns.h"
   51.28 +
   51.29 +/**
   51.30 + * All-reduce for contigous primitive types
   51.31 + */
   51.32 +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, 
   51.33 +        ompi_datatype_t *dtype, int my_rank_in_group,
   51.34 +        struct ompi_op_t *op, int n_peers,int *ranks_in_comm, 
   51.35 +        ompi_communicator_t *comm)
   51.36 +{
   51.37 +    /* local variables */
   51.38 +    int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
   51.39 +    int pair_rank,exchange,extra_rank;
   51.40 +    netpatterns_pair_exchange_node_t my_exchange_node;
   51.41 +    int count_processed,count_this_stripe;
   51.42 +    size_t dt_size,dt_extent;
   51.43 +    char scratch_bufers[2][MAX_TMP_BUFFER];
   51.44 +    int send_buffer=0,recv_buffer=1;
   51.45 +    char *sbuf_current, *rbuf_current;
   51.46 +    ompi_request_t *requests[2];
   51.47 +
   51.48 +    /* get size of data needed - same layout as user data, so that
   51.49 +     *   we can apply the reudction routines directly on these buffers
   51.50 +     */
   51.51 +    rc = opal_datatype_type_size((opal_datatype_t *)dtype, &dt_size);
   51.52 +    if( OMPI_SUCCESS != rc ) {
   51.53 +        goto Error;
   51.54 +    }
   51.55 +    rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent);
   51.56 +    if( OMPI_SUCCESS != rc ) {
   51.57 +        goto Error;
   51.58 +    }
   51.59 +    
   51.60 +    /* 1 process special case */
   51.61 +    if(1 == n_peers) {
   51.62 +        /* place my data in the correct destination buffer */
   51.63 +        rc=ompi_datatype_copy_content_same_ddt(dtype,count,
   51.64 +                (char *)rbuf, (char *)sbuf);
   51.65 +        if( OMPI_SUCCESS != rc ) {
   51.66 +            goto Error;
   51.67 +        }
   51.68 +        return OMPI_SUCCESS;
   51.69 +    }
   51.70 +
   51.71 +    /* number of data types copies that the scratch buffer can hold */
   51.72 +    n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_extent;
   51.73 +    if ( 0 == n_dts_per_buffer ) {
   51.74 +        rc=OMPI_ERROR;
   51.75 +        goto Error;
   51.76 +    }
   51.77 +
   51.78 +    /* compute number of stripes needed to process this collective */
   51.79 +    n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
   51.80 +
   51.81 +    /* get my reduction communication pattern */
   51.82 +    rc = netpatterns_setup_recursive_doubling_tree_node(n_peers,
   51.83 +            my_rank_in_group, &my_exchange_node);
   51.84 +    if(OMPI_SUCCESS != rc){
   51.85 +        return rc;
   51.86 +    }
   51.87 +
   51.88 +    count_processed=0;
   51.89 +
   51.90 +    /* get a pointer to the shared-memory working buffer */
   51.91 +    /* NOTE: starting with a rather synchronous approach */
   51.92 +    for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
   51.93 +
   51.94 +        /* get number of elements to process in this stripe */
   51.95 +        count_this_stripe=n_dts_per_buffer;
   51.96 +        if( count_processed + count_this_stripe > count )
   51.97 +            count_this_stripe=count-count_processed;
   51.98 +
   51.99 +        /* copy data from the input buffer into the temp buffer */
  51.100 +        sbuf_current=(char *)sbuf+count_processed*dt_extent;
  51.101 +        rc=ompi_datatype_copy_content_same_ddt(dtype,count_this_stripe,
  51.102 +                scratch_bufers[send_buffer], sbuf_current);
  51.103 +        if( OMPI_SUCCESS != rc ) {
  51.104 +            goto Error;
  51.105 +        }
  51.106 +
  51.107 +        /* copy data in from the "extra" source, if need be */
  51.108 +        if(0 < my_exchange_node.n_extra_sources)  {
  51.109 +
  51.110 +            if ( EXCHANGE_NODE == my_exchange_node.node_type ) {
  51.111 +                
  51.112 +                /*
  51.113 +                ** Receive data from extra node
  51.114 +                */
  51.115 +                extra_rank=my_exchange_node.rank_extra_source;
  51.116 +                rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
  51.117 +                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  51.118 +                            -OMPI_COMMON_TAG_ALLREDUCE, comm,
  51.119 +                            MPI_STATUSES_IGNORE));
  51.120 +                if( 0 > rc ) {
  51.121 +                    fprintf(stderr,"  first recv failed in comm_allreduce_pml \n");
  51.122 +                    fflush(stderr);
  51.123 +                    goto  Error;
  51.124 +                }
  51.125 +
  51.126 +
  51.127 +                /* apply collective operation to first half of the data */
  51.128 +                if( 0 < count_this_stripe ) {
  51.129 +                    ompi_op_reduce(op,
  51.130 +                            (void *)scratch_bufers[send_buffer],
  51.131 +                            (void *)scratch_bufers[recv_buffer],
  51.132 +                            count_this_stripe,dtype);
  51.133 +                }
  51.134 +
  51.135 +
  51.136 +            } else {
  51.137 +        
  51.138 +                /*
  51.139 +                ** Send data to "partner" node
  51.140 +                */
  51.141 +                extra_rank=my_exchange_node.rank_extra_source;
  51.142 +                rc=MCA_PML_CALL(send(scratch_bufers[send_buffer],
  51.143 +                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  51.144 +                            -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
  51.145 +                            comm));
  51.146 +                if( 0 > rc ) {
  51.147 +                    fprintf(stderr,"  first send failed in comm_allreduce_pml \n");
  51.148 +                    fflush(stderr);
  51.149 +                    goto  Error;
  51.150 +                }
  51.151 +            }
  51.152 +
  51.153 +            /* change pointer to scratch buffer - this was we can send data
  51.154 +            ** that we have summed w/o a memory copy, and receive data into the
  51.155 +            ** other buffer, w/o fear of over writting data that has not yet
  51.156 +            ** completed being send
  51.157 +            */
  51.158 +            recv_buffer^=1;
  51.159 +            send_buffer^=1;
  51.160 +        }
  51.161 +
  51.162 +        /* loop over data exchanges */
  51.163 +        for(exchange=0 ; exchange < my_exchange_node.n_exchanges ; exchange++) {
  51.164 +
  51.165 +            /* is the remote data read */
  51.166 +            pair_rank=my_exchange_node.rank_exchanges[exchange];
  51.167 +
  51.168 +            /* post non-blocking receive */
  51.169 +            rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer],
  51.170 +                        count_this_stripe,dtype,ranks_in_comm[pair_rank],
  51.171 +                        -OMPI_COMMON_TAG_ALLREDUCE,
  51.172 +                        comm,&(requests[0])));
  51.173 +            if( 0 > rc ) {
  51.174 +                fprintf(stderr,"  irecv failed in  comm_allreduce_pml at iterations %d \n",
  51.175 +                        exchange);
  51.176 +                fflush(stderr);
  51.177 +                goto Error;
  51.178 +            }
  51.179 +
  51.180 +            /* post non-blocking send */
  51.181 +            rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer],
  51.182 +                        count_this_stripe,dtype, ranks_in_comm[pair_rank],
  51.183 +                        -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD,
  51.184 +                        comm,&(requests[1])));
  51.185 +            if( 0 > rc ) {
  51.186 +                fprintf(stderr,"  isend failed in  comm_allreduce_pml at iterations %d \n",
  51.187 +                        exchange);
  51.188 +                fflush(stderr);
  51.189 +                goto Error;
  51.190 +            }
  51.191 +            /* wait on send and receive completion */
  51.192 +            ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE);
  51.193 +
  51.194 +            /* reduce the data */
  51.195 +            if( 0 < count_this_stripe ) {
  51.196 +                ompi_op_reduce(op,
  51.197 +                        (void *)scratch_bufers[send_buffer],
  51.198 +                        (void *)scratch_bufers[recv_buffer],
  51.199 +                        count_this_stripe,dtype);
  51.200 +            }
  51.201 +            /* get ready for next step */
  51.202 +            recv_buffer^=1;
  51.203 +            send_buffer^=1;
  51.204 +
  51.205 +        }
  51.206 +
  51.207 +        /* copy data in from the "extra" source, if need be */
  51.208 +        if(0 < my_exchange_node.n_extra_sources)  {
  51.209 +
  51.210 +            if ( EXTRA_NODE == my_exchange_node.node_type ) {
  51.211 +                /* 
  51.212 +                ** receive the data 
  51.213 +                ** */
  51.214 +                extra_rank=my_exchange_node.rank_extra_source;
  51.215 +                rc=MCA_PML_CALL(recv(scratch_bufers[recv_buffer],
  51.216 +                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  51.217 +                            -OMPI_COMMON_TAG_ALLREDUCE, comm,
  51.218 +                            MPI_STATUSES_IGNORE));
  51.219 +                if( 0 > rc ) {
  51.220 +                    fprintf(stderr,"  last recv failed in comm_allreduce_pml \n");
  51.221 +                    fflush(stderr);
  51.222 +                    goto  Error;
  51.223 +                }
  51.224 +
  51.225 +                recv_buffer^=1;
  51.226 +                send_buffer^=1;
  51.227 +            } else {
  51.228 +                /* send the data to the pair-rank outside of the power of 2 set
  51.229 +                ** of ranks
  51.230 +                */
  51.231 +
  51.232 +                extra_rank=my_exchange_node.rank_extra_source;
  51.233 +                rc=MCA_PML_CALL(send((char *)scratch_bufers[send_buffer],
  51.234 +                            count_this_stripe,dtype,ranks_in_comm[extra_rank],
  51.235 +                            -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD,
  51.236 +                            comm));
  51.237 +                if( 0 > rc ) {
  51.238 +                    fprintf(stderr,"  last send failed in comm_allreduce_pml \n");
  51.239 +                    fflush(stderr);
  51.240 +                    goto  Error;
  51.241 +                }
  51.242 +            }
  51.243 +        }
  51.244 +
  51.245 +        /* copy data from the temp buffer into the output buffer */
  51.246 +        rbuf_current = (char *) rbuf + count_processed * dt_size;
  51.247 +        memcpy(rbuf_current,scratch_bufers[send_buffer], count_this_stripe*dt_size);
  51.248 +    
  51.249 +        /* update the count of elements processed */
  51.250 +        count_processed += count_this_stripe;
  51.251 +    }
  51.252 +
  51.253 +    /* return */
  51.254 +    return OMPI_SUCCESS;
  51.255 +
  51.256 +Error:
  51.257 +    return rc;
  51.258 +}
    52.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.2 +++ b/ompi/patterns/comm/bcast.c	Tue Feb 19 22:50:56 2013 +0000
    52.3 @@ -0,0 +1,97 @@
    52.4 +/*
    52.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    52.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    52.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    52.8 + *                         All rights reserved.
    52.9 + * $COPYRIGHT$
   52.10 + * 
   52.11 + * Additional copyrights may follow
   52.12 + * 
   52.13 + * $HEADER$
   52.14 + */
   52.15 +/** @file */
   52.16 +
   52.17 +#include "ompi_config.h"
   52.18 +
   52.19 +#include "ompi/constants.h"
   52.20 +#include "ompi/op/op.h"
   52.21 +#include "ompi/datatype/ompi_datatype.h"
   52.22 +#include "ompi/communicator/communicator.h"
   52.23 +#include "opal/include/opal/sys/atomic.h"
   52.24 +#include "ompi/mca/pml/pml.h"
   52.25 +#include "ompi/patterns/net/netpatterns.h"
   52.26 +#include "coll_ops.h"
   52.27 +
   52.28 +/**
   52.29 + * Bcast - subgroup in communicator
   52.30 + *  This is a very simple algorithm - binary tree, transmitting the full
   52.31 + *  message at each step.
   52.32 + */
   52.33 +OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
   52.34 +        ompi_datatype_t *dtype, int my_rank_in_group,
   52.35 +        int n_peers, int *ranks_in_comm,ompi_communicator_t *comm)
   52.36 +{
   52.37 +    /* local variables */
   52.38 +    int rc=OMPI_SUCCESS,msg_cnt,i;
   52.39 +    ompi_request_t *requests[2];
   52.40 +    int node_rank, peer_rank;
   52.41 +    netpatterns_tree_node_t node_data;
   52.42 +
   52.43 +    /*
   52.44 +     * shift rank to root==0 tree
   52.45 +     */
   52.46 +    node_rank=(my_rank_in_group-root+n_peers)%n_peers;
   52.47 +
   52.48 +    /*
   52.49 +     * compute my communication pattern - binary tree
   52.50 +     */
   52.51 +    rc=netpatterns_setup_narray_tree(2, node_rank, n_peers,
   52.52 +            &node_data);
   52.53 +    if( OMPI_SUCCESS != rc ) {
   52.54 +        goto Error;
   52.55 +    }
   52.56 +
   52.57 +    /* 1 process special case */
   52.58 +    if(1 == n_peers) {
   52.59 +        return OMPI_SUCCESS;
   52.60 +    }
   52.61 +
   52.62 +    /* if I have parents - wait on the data to arrive */
   52.63 +    if(node_data.n_parents) {
   52.64 +        /* I will have only 1 parent */
   52.65 +        peer_rank=node_data.parent_rank;
   52.66 +        peer_rank=(peer_rank+root)%n_peers;
   52.67 +        /* translate back to actual rank */
   52.68 +        rc=MCA_PML_CALL(recv(buffer, count,dtype,peer_rank,
   52.69 +                    -OMPI_COMMON_TAG_BCAST, comm, MPI_STATUSES_IGNORE));
   52.70 +        if( 0 > rc ) {
   52.71 +            goto Error;
   52.72 +        }
   52.73 +    }
   52.74 +
   52.75 +    /* send the data to my children */
   52.76 +    msg_cnt=0;
   52.77 +    for(i=0 ; i < node_data.n_children ; i++ ) {
   52.78 +        peer_rank=node_data.children_ranks[i];
   52.79 +        peer_rank=(peer_rank+root)%n_peers;
   52.80 +        rc=MCA_PML_CALL(isend(buffer,
   52.81 +                    count,dtype,peer_rank,
   52.82 +                    -OMPI_COMMON_TAG_BCAST,MCA_PML_BASE_SEND_STANDARD,
   52.83 +                    comm,&(requests[msg_cnt])));
   52.84 +        if( 0 > rc ) {
   52.85 +            goto Error;
   52.86 +        }
   52.87 +        msg_cnt++;
   52.88 +    }
   52.89 +    /* wait for send completion */
   52.90 +    if(msg_cnt) {
   52.91 +        /* wait on send and receive completion */
   52.92 +        ompi_request_wait_all(msg_cnt,requests,MPI_STATUSES_IGNORE);
   52.93 +    }
   52.94 +
   52.95 +    /* return */
   52.96 +    return OMPI_SUCCESS;
   52.97 +
   52.98 +Error:
   52.99 +    return rc;
  52.100 +}
    53.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    53.2 +++ b/ompi/patterns/comm/coll_ops.h	Tue Feb 19 22:50:56 2013 +0000
    53.3 @@ -0,0 +1,51 @@
    53.4 +/*
    53.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    53.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    53.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    53.8 + *                         All rights reserved.
    53.9 + * $COPYRIGHT$
   53.10 + *
   53.11 + * Additional copyrights may follow
   53.12 + *
   53.13 + * $HEADER$
   53.14 + */
   53.15 +
   53.16 +#ifndef COMM_COLL_OP_TYPES_H
   53.17 +#define COMM_COLL_OP_TYPES_H
   53.18 +
   53.19 +#include "ompi_config.h"
   53.20 +#include "ompi/communicator/communicator.h"
   53.21 +#include "ompi/datatype/ompi_datatype.h"
   53.22 +#include "ompi/proc/proc.h"
   53.23 +
   53.24 +BEGIN_C_DECLS
   53.25 +
   53.26 +#define OMPI_COMMON_TAG_ALLREDUCE 99
   53.27 +#define OMPI_COMMON_TAG_BCAST     98
   53.28 +
   53.29 +
   53.30 +
   53.31 +
   53.32 +OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count,
   53.33 +        ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, 
   53.34 +        int *ranks_in_comm,ompi_communicator_t *comm);
   53.35 +OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count,
   53.36 +        ompi_datatype_t *dtype, int my_rank_in_group,
   53.37 +        struct ompi_op_t *op, int n_peers,int *ranks_in_comm,
   53.38 +        ompi_communicator_t *comm);
   53.39 +OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count,
   53.40 +        ompi_datatype_t *dtype, int my_rank_in_group,
   53.41 +        int n_peers, int *ranks_in_comm,ompi_communicator_t
   53.42 +        *comm);
   53.43 +
   53.44 +/* reduction operations supported */
   53.45 +#define OP_SUM 1
   53.46 +#define OP_MAX 2
   53.47 +#define OP_MIN 3
   53.48 +
   53.49 +#define TYPE_INT4 1
   53.50 +
   53.51 +
   53.52 +END_C_DECLS
   53.53 +
   53.54 +#endif /* COMM_COLL_OP_TYPES_H */
    54.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.2 +++ b/ompi/patterns/comm/commpatterns.h	Tue Feb 19 22:50:56 2013 +0000
    54.3 @@ -0,0 +1,22 @@
    54.4 +/*
    54.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    54.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    54.7 + * $COPYRIGHT$
    54.8 + *
    54.9 + * Additional copyrights may follow
   54.10 + *
   54.11 + * $HEADER$
   54.12 + */
   54.13 +
   54.14 +#ifndef COMM_NETPATTERNS_H
   54.15 +#define COMM_NETPATTERNS_H
   54.16 +
   54.17 +#include "ompi_config.h"
   54.18 +
   54.19 +BEGIN_C_DECLS
   54.20 +
   54.21 +#define MAX_TMP_BUFFER            8192
   54.22 +
   54.23 +END_C_DECLS
   54.24 +
   54.25 +#endif /* COMM_NETPATTERNS_H */
    55.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.2 +++ b/ompi/patterns/net/Makefile.am	Tue Feb 19 22:50:56 2013 +0000
    55.3 @@ -0,0 +1,18 @@
    55.4 +# Copyright (c) 2013 Oak Ridge National Laboratory.  All rights reserved.
    55.5 +# $COPYRIGHT$
    55.6 +# 
    55.7 +# Additional copyrights may follow
    55.8 +# 
    55.9 +# $HEADER$
   55.10 +#
   55.11 +
   55.12 +headers += \
   55.13 +        patterns/net/netpatterns.h \
   55.14 +        patterns/net/netpatterns_knomial_tree.h \
   55.15 +        patterns/net/coll_ops.h
   55.16 +
   55.17 +libmpi_la_SOURCES += \
   55.18 +		patterns/net/netpatterns_base.c \
   55.19 +		patterns/net/netpatterns_multinomial_tree.c \
   55.20 +		patterns/net/netpatterns_nary_tree.c \
   55.21 +		patterns/net/netpatterns_knomial_tree.c
    56.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.2 +++ b/ompi/patterns/net/allreduce.c	Tue Feb 19 22:50:56 2013 +0000
    56.3 @@ -0,0 +1,347 @@
    56.4 +/*
    56.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    56.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    56.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    56.8 + *                         All rights reserved.
    56.9 + * $COPYRIGHT$
   56.10 + * 
   56.11 + * Additional copyrights may follow
   56.12 + * 
   56.13 + * $HEADER$
   56.14 + */
   56.15 +/** @file */
   56.16 +
   56.17 +#include "ompi_config.h"
   56.18 +
   56.19 +#include "ompi/constants.h"
   56.20 +#include "coll_sm2.h"
   56.21 +#include "ompi/op/op.h"
   56.22 +#include "ompi/datatype/ompi_datatype.h"
   56.23 +#include "ompi/communicator/communicator.h"
   56.24 +#include "ompi/mca/rte/rte.h"
   56.25 +
   56.26 +void send_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg, 
   56.27 +                     int count, ompi_rml_tag_t tag, void* cbdata)
   56.28 +{
   56.29 +    /* set send completion flag */
   56.30 +    *(int *)cbdata=1;
   56.31 +}
   56.32 +
   56.33 +
   56.34 +void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* msg, 
   56.35 +                     int count, ompi_rml_tag_t tag, void* cbdata)
   56.36 +{
   56.37 +    /* set receive completion flag */
   56.38 +    MB();
   56.39 +    *(int *)cbdata=1;
   56.40 +}
   56.41 +
   56.42 +
   56.43 +static void op_reduce(int op_type,(void *)src_dest_buf,(void *) src_buf, int count,
   56.44 +        int data_type)
   56.45 +{
   56.46 +    /* local variables */
   56.47 +    int ret;
   56.48 +
   56.49 +    /* op type */
   56.50 +    switch (op_type) {
   56.51 +
   56.52 +        case OP_SUM:
   56.53 +
   56.54 +            
   56.55 +            switch (data_type) {
   56.56 +                case TYPE_INT4:
   56.57 +                    int *int_src_ptr=(int *)src_ptr;
   56.58 +                    int *int_src_dst_ptr=(int *)src_dst_ptr;
   56.59 +                    int cnt;
   56.60 +                    for(cnt=0 ; cnt < count ; ) {
   56.61 +                        (*(int_src_dst_ptr))+=(*(int_src_ptr));
   56.62 +                    break;
   56.63 +                default:
   56.64 +                    ret=OMPI_ERROR;
   56.65 +                    goto Error;
   56.66 +            }
   56.67 +
   56.68 +            break;
   56.69 +
   56.70 +        default:
   56.71 +        ret=OMPI_ERROR;
   56.72 +        goto Error;
   56.73 +    }
   56.74 +Error:
   56.75 +    return ret;
   56.76 +}
   56.77 +
   56.78 +/**
   56.79 + * All-reduce for contigous primitive types
   56.80 + */
   56.81 +static
   56.82 +comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, 
   56.83 +        int op_type, opal_list_t *peers)
   56.84 +{
   56.85 +    /* local variables */
   56.86 +    int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
   56.87 +    int pair_rank,exchange,extra_rank;
   56.88 +    int index_read,index_write;
   56.89 +    netpatterns_pair_exchange_node_t my_exchange_node;
   56.90 +    int my_rank,count_processed,count_this_stripe;
   56.91 +    size_t n_peers,message_extent,len_data_buffer;
   56.92 +    size_t dt_size;
   56.93 +    long long tag, base_tag;
   56.94 +    sm_work_buffer_t *sm_buffer_desc;
   56.95 +    opal_list_item_t *item;
   56.96 +    char scratch_bufers[2][MAX_TMP_BUFFER];
   56.97 +    int send_buffer=0;recv_buffer=1;
   56.98 +    char *sbuf_current,*rbuf_current;
   56.99 +    ompi_proc_t **proc_array;
  56.100 +    struct iovec send_iov, recv_iov;
  56.101 +    volatile int *recv_done, *send_done;
  56.102 +    int recv_completion_flag, send_completion_flag;
  56.103 +    int data_type;
  56.104 +
  56.105 +    /* get size of data needed - same layout as user data, so that
  56.106 +     *   we can apply the reudction routines directly on these buffers
  56.107 +     */
  56.108 +    rc=opal_datatype_type_size(dtype, &dt_size);
  56.109 +    if( OMPI_SUCCESS != rc ) {
  56.110 +        goto Error;
  56.111 +    }
  56.112 +    message_extent=dt_extent*count;
  56.113 +
  56.114 +    /* lenght of control and data regions */
  56.115 +    len_data_buffer=sm_module->data_memory_per_proc_per_segment;
  56.116 +
  56.117 +    /* number of data types copies that the scratch buffer can hold */
  56.118 +    n_dts_per_buffer=((int) MAX_TMP_BUFFER)/dt_size;
  56.119 +    if ( 0 == n_dts_per_buffer ) {
  56.120 +        rc=OMPI_ERROR;
  56.121 +        goto Error;
  56.122 +    }
  56.123 +
  56.124 +    /* need a read and a write buffer for a pair-wise exchange of data */
  56.125 +    n_dts_per_buffer/=2;
  56.126 +    len_data_buffer=n_dts_per_buffer*dt_size;
  56.127 +
  56.128 +    /* compute number of stripes needed to process this collective */
  56.129 +    n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
  56.130 +
  56.131 +    /* */
  56.132 +    n_peers=opal_list_get_size(peers);
  56.133 +
  56.134 +    /* get my rank in the list */
  56.135 +    my_rank=0;
  56.136 +    for (item = opal_list_get_first(peers) ;
  56.137 +            item != opal_list_get_end(peers) ;
  56.138 +            item = opal_list_get_next(peers)) {
  56.139 +        if(ompi_proc_local()==(ompi_proc_t *)item){
  56.140 +            /* this is the pointer to my proc strucuture */
  56.141 +            break;
  56.142 +        }
  56.143 +        my_rank++;
  56.144 +    }
  56.145 +    proc_array=(ompi_proc_t **)malloc(sizeof(ompi_proc_t *)*n_peers);
  56.146 +    if( NULL == proc_array) {
  56.147 +        goto Error;
  56.148 +    }
  56.149 +    cnt=0;
  56.150 +    for (item = opal_list_get_first(peers) ;
  56.151 +            item != opal_list_get_end(peers) ;
  56.152 +            item = opal_list_get_next(peers)) {
  56.153 +        proc_array[cnt]=(ompi_proc_t *)item;
  56.154 +        cnt++;
  56.155 +    }
  56.156 +
  56.157 +    /* get my reduction communication pattern */
  56.158 +    ret=netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node);
  56.159 +    if(OMPI_SUCCESS != ret){
  56.160 +        return ret;
  56.161 +    }
  56.162 +
  56.163 +    /* setup flags for non-blocking communications */    
  56.164 +    recv_done=&recv_completion_flag;
  56.165 +    send_done=&send_completion_flag;
  56.166 +
  56.167 +    /* set data type */
  56.168 +    if(&opal_datatype_int4==dtype) {
  56.169 +        data_type=TYPE_INT4;
  56.170 +    }
  56.171 +
  56.172 +    count_processed=0;
  56.173 +
  56.174 +    /* get a pointer to the shared-memory working buffer */
  56.175 +    /* NOTE: starting with a rather synchronous approach */
  56.176 +    for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
  56.177 +
  56.178 +        /* get number of elements to process in this stripe */
  56.179 +        count_this_stripe=n_dts_per_buffer;
  56.180 +        if( count_processed + count_this_stripe > count )
  56.181 +            count_this_stripe=count-count_processed;
  56.182 +
  56.183 +        /* copy data from the input buffer into the temp buffer */
  56.184 +        sbuf_current=(char *)sbuf+count_processed*dt_size;
  56.185 +        memcopy(scratch_bufers[send_buffer],sbuf_current,count_this_stripe*dt_size);
  56.186 +
  56.187 +        /* copy data in from the "extra" source, if need be */
  56.188 +        if(0 < my_exchange_node->n_extra_sources)  {
  56.189 +
  56.190 +            if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
  56.191 +                
  56.192 +                /*
  56.193 +                ** Receive data from extra node
  56.194 +                */
  56.195 +                
  56.196 +                extra_rank=my_exchange_node.rank_extra_source;
  56.197 +                recv_iov.iov_base=scratch_bufers[recv_buffer];
  56.198 +                recv_iov.iov_len=count_this_stripe*dt_size;
  56.199 +                rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
  56.200 +                        OMPI_RML_TAG_ALLREDUCE , 0);
  56.201 +                if(OMPI_SUCCESS != rc ) {
  56.202 +                    goto  Error;
  56.203 +                }
  56.204 +
  56.205 +                /* apply collective operation to first half of the data */
  56.206 +                if( 0 < count_this_stripe ) {
  56.207 +                    op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
  56.208 +                            (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
  56.209 +                }
  56.210 +
  56.211 +
  56.212 +            } else {
  56.213 +        
  56.214 +                /*
  56.215 +                ** Send data to "partner" node
  56.216 +                */
  56.217 +                extra_rank=my_exchange_node.rank_extra_source;
  56.218 +                send_iov.iov_base=scratch_bufers[send_buffer];
  56.219 +                send_iov.iov_len=count_this_stripe*dt_size;
  56.220 +                rc = ompi_rte_send(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
  56.221 +                        OMPI_RML_TAG_ALLREDUCE , 0);
  56.222 +                if(OMPI_SUCCESS != rc ) {
  56.223 +                    goto  Error;
  56.224 +                }
  56.225 +            }
  56.226 +
  56.227 +            /* change pointer to scratch buffer - this was we can send data
  56.228 +            ** that we have summed w/o a memory copy, and receive data into the
  56.229 +            ** other buffer, w/o fear of over writting data that has not yet
  56.230 +            ** completed being send
  56.231 +            */
  56.232 +            recv_buffer^=1;
  56.233 +            send_buffer^=1;
  56.234 +        }
  56.235 +
  56.236 +        MB();
  56.237 +        /*
  56.238 +         * Signal parent that data is ready
  56.239 +         */
  56.240 +        tag=base_tag+1;
  56.241 +        my_ctl_pointer->flag=tag;
  56.242 +
  56.243 +        /* loop over data exchanges */
  56.244 +        for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
  56.245 +
  56.246 +            /* debug 
  56.247 +            t4=opal_sys_timer_get_cycles();
  56.248 +             end debug */
  56.249 +
  56.250 +
  56.251 +            my_write_pointer=my_tmp_data_buffer[index_write];
  56.252 +            my_read_pointer=my_tmp_data_buffer[index_read];
  56.253 +
  56.254 +            /* is the remote data read */
  56.255 +            pair_rank=my_exchange_node->rank_exchanges[exchange];
  56.256 +
  56.257 +            *recv_done=0; 
  56.258 +            *send_done=0;
  56.259 +            MB();
  56.260 +
  56.261 +            /* post non-blocking receive */
  56.262 +            recv_iov.iov_base=scratch_bufers[send_buffer];
  56.263 +            recv_iov.iov_len=count_this_stripe*dt_size;
  56.264 +            rc = ompi_rte_recv_nb(&(proc_array[extra_rank]->proc_name), recv_iov, 1,
  56.265 +                        OMPI_RML_TAG_ALLREDUCE , 0, recv_completion, recv_done);
  56.266 +
  56.267 +            /* post non-blocking send */
  56.268 +            send_iov.iov_base=scratch_bufers[send_buffer];
  56.269 +            send_iov.iov_len=count_this_stripe*dt_size;
  56.270 +            rc = ompi_rte_send_nb(&(proc_array[extra_rank]->proc_name), send_iov, 1,
  56.271 +                        OMPI_RML_TAG_ALLREDUCE , 0, send_completion, send_done);
  56.272 +
  56.273 +            /* wait on receive completion */
  56.274 +            while(!(*recv_done) ) {
  56.275 +                opal_progress();
  56.276 +            }
  56.277 +                
  56.278 +            /* reduce the data */
  56.279 +            if( 0 < count_this_stripe ) {
  56.280 +                op_reduce(op_type,(void *)scratch_bufers[recv_buffer],
  56.281 +                        (void *)scratch_bufers[send_buffer], n_my_count,TYPE_INT4);
  56.282 +            }
  56.283 +
  56.284 +            
  56.285 +            /* get ready for next step */
  56.286 +            index_read=(exchange&1);
  56.287 +            index_write=((exchange+1)&1);
  56.288 +
  56.289 +            /* wait on send completion */
  56.290 +            while(!(*send_done) ) {
  56.291 +                opal_progress();
  56.292 +            }
  56.293 +                
  56.294 +        }
  56.295 +
  56.296 +        /* copy data in from the "extra" source, if need be */
  56.297 +        if(0 < my_exchange_node->n_extra_sources)  {
  56.298 +
  56.299 +            if ( EXTRA_NODE == my_exchange_node->node_type ) {
  56.300 +                /* 
  56.301 +                ** receive the data 
  56.302 +                ** */
  56.303 +                extra_rank=my_exchange_node->rank_extra_source;
  56.304 +
  56.305 +                recv_iov.iov_base=scratch_bufers[recv_buffer];
  56.306 +                recv_iov.iov_len=count_this_stripe*dt_size;
  56.307 +                rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &recv_iov, 1,
  56.308 +                        OMPI_RML_TAG_ALLREDUCE , 0);
  56.309 +                if(OMPI_SUCCESS != rc ) {
  56.310 +                    goto  Error;
  56.311 +                }
  56.312 +
  56.313 +            } else {
  56.314 +                /* send the data to the pair-rank outside of the power of 2 set
  56.315 +                ** of ranks
  56.316 +                */
  56.317 +
  56.318 +                extra_rank=my_exchange_node->rank_extra_source;
  56.319 +                send_iov.iov_base=scratch_bufers[recv_buffer];
  56.320 +                send_iov.iov_len=count_this_stripe*dt_size;
  56.321 +                rc = ompi_rte_recv(&(proc_array[extra_rank]->proc_name), &send_iov, 1,
  56.322 +                        OMPI_RML_TAG_ALLREDUCE , 0);
  56.323 +                if(OMPI_SUCCESS != rc ) {
  56.324 +                    goto  Error;
  56.325 +                }
  56.326 +            }
  56.327 +        }
  56.328 +
  56.329 +        /* copy data into the destination buffer */
  56.330 +        rc=ompi_datatype_copy_content_same_ddt(dtype, count_this_stripe,
  56.331 +                (char *)((char *)rbuf+dt_extent*count_processed),
  56.332 +                (char *)my_write_pointer);
  56.333 +        if( 0 != rc ) {
  56.334 +            return OMPI_ERROR;
  56.335 +        }
  56.336 +
  56.337 +        /* copy data from the temp buffer into the output buffer */
  56.338 +        rbuf_current=(char *)rbuf+count_processed*dt_size;
  56.339 +        memcopy(scratch_bufers[recv_buffer],rbuf_current,count_this_stripe*dt_size);
  56.340 +    
  56.341 +        /* update the count of elements processed */
  56.342 +        count_processed+=count_this_stripe;
  56.343 +    }
  56.344 +
  56.345 +    /* return */
  56.346 +    return rc;
  56.347 +
  56.348 +Error:
  56.349 +    return rc;
  56.350 +}
    57.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    57.2 +++ b/ompi/patterns/net/coll_ops.h	Tue Feb 19 22:50:56 2013 +0000
    57.3 @@ -0,0 +1,29 @@
    57.4 +/*
    57.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    57.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    57.7 + * $COPYRIGHT$
    57.8 + *
    57.9 + * Additional copyrights may follow
   57.10 + *
   57.11 + * $HEADER$
   57.12 + */
   57.13 +
   57.14 +#ifndef COMM_OP_TYPES_H
   57.15 +#define COMM_OP_TYPES_H
   57.16 +
   57.17 +#include "ompi_config.h"
   57.18 +
   57.19 +BEGIN_C_DECLS
   57.20 +
   57.21 +int comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype,
   57.22 +                int op, opal_list_t *peers);
   57.23 +
   57.24 +/* reduction operations supported */
   57.25 +#define OP_SUM 1
   57.26 +
   57.27 +#define TYPE_INT4 1
   57.28 +
   57.29 +
   57.30 +END_C_DECLS
   57.31 +
   57.32 +#endif /* COMM_OP_TYPES_H */
    58.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    58.2 +++ b/ompi/patterns/net/netpatterns.h	Tue Feb 19 22:50:56 2013 +0000
    58.3 @@ -0,0 +1,152 @@
    58.4 +/*
    58.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    58.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    58.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    58.8 + *                         All rights reserved.
    58.9 +  * $COPYRIGHT$
   58.10 + *
   58.11 + * Additional copyrights may follow
   58.12 + *
   58.13 + * $HEADER$
   58.14 + */
   58.15 +
   58.16 +#ifndef COMM_PATTERNS_H
   58.17 +#define COMM_PATTERNS_H
   58.18 +
   58.19 +#include "ompi_config.h"
   58.20 +#include "orte/util/proc_info.h"
   58.21 +#include "orte/runtime/orte_globals.h"
   58.22 +#include "orte/util/name_fns.h"
   58.23 +
   58.24 +#include "netpatterns_knomial_tree.h"
   58.25 +
   58.26 +BEGIN_C_DECLS
   58.27 +
   58.28 +int netpatterns_base_err(const char* fmt, ...);
   58.29 +int netpatterns_register_mca_params(void);
   58.30 +
   58.31 +#if OPAL_ENABLE_DEBUG
   58.32 +extern int netpatterns_base_verbose; /* disabled by default */
   58.33 +OMPI_DECLSPEC extern int netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
   58.34 +#define NETPATTERNS_VERBOSE(args)                                \
   58.35 +    do {                                                         \
   58.36 +        if(netpatterns_base_verbose > 0) {           \
   58.37 +            netpatterns_base_err("[%s]%s[%s:%d:%s] ",\
   58.38 +                    orte_process_info.nodename,                  \
   58.39 +                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),          \
   58.40 +                    __FILE__, __LINE__, __func__);               \
   58.41 +            netpatterns_base_err args;               \
   58.42 +            netpatterns_base_err("\n");              \
   58.43 +        }                                                        \
   58.44 +    } while(0); 
   58.45 +#else
   58.46 +#define NETPATTERNS_VERBOSE(args)
   58.47 +#endif
   58.48 +
   58.49 +#define FIND_BASE(base,myid,level,k)    \
   58.50 +    do {                                \
   58.51 +        int temp = 1;                   \
   58.52 +        int jj;                         \
   58.53 +        int knt2;                       \
   58.54 +                                        \
   58.55 +        base = 0;                       \
   58.56 +        for( jj = 0; jj < level; jj++) {\
   58.57 +            temp *= k;                  \
   58.58 +        }                               \
   58.59 +        knt2 = 1;                       \
   58.60 +        while(myid >= knt2*temp){       \
   58.61 +            knt2++;                     \
   58.62 +        }                               \
   58.63 +        base = knt2*temp - temp;        \
   58.64 +    } while(0)                          \
   58.65 +
   58.66 +
   58.67 +
   58.68 +
   58.69 +/* enum for node type */
   58.70 +enum {
   58.71 +    ROOT_NODE,
   58.72 +    LEAF_NODE,
   58.73 +    INTERIOR_NODE
   58.74 +};
   58.75 +
   58.76 +
   58.77 +/*
   58.78 + * N-order tree node description
   58.79 + */
   58.80 +struct netpatterns_tree_node_t {
   58.81 +    /* my rank within the group */
   58.82 +    int my_rank;
   58.83 +    /* my node type - root, leaf, or interior */
   58.84 +    int my_node_type;
   58.85 +    /* number of nodes in the tree */
   58.86 +    int tree_size;
   58.87 +    /* number of parents (0/1) */
   58.88 +    int n_parents;
   58.89 +    /* number of children */
   58.90 +    int n_children;
   58.91 +    /* parent rank within the group */
   58.92 +    int parent_rank;
   58.93 +    /* chidren ranks within the group */
   58.94 +    int *children_ranks;
   58.95 +};
   58.96 +typedef struct netpatterns_tree_node_t netpatterns_tree_node_t;
   58.97 +
   58.98 +struct netpatterns_k_exchange_node_t;
   58.99 +/*
  58.100 + * N-order + knominal tree node description
  58.101 + */
  58.102 +struct netpatterns_narray_knomial_tree_node_t {
  58.103 +    /* my rank within the group */
  58.104 +    int my_rank;
  58.105 +    /* my node type - root, leaf, or interior */
  58.106 +    int my_node_type;
  58.107 +    /* number of nodes in the tree */
  58.108 +    int tree_size;
  58.109 +    /* number of parents (0/1) */
  58.110 +    int n_parents;
  58.111 +    /* number of children */
  58.112 +    int n_children;
  58.113 +    /* parent rank within the group */
  58.114 +    int parent_rank;
  58.115 +    /* chidren ranks within the group */
  58.116 +    int *children_ranks;
  58.117 +    /* Total number of ranks on this specific level */
  58.118 +    int level_size;
  58.119 +    /* Rank on this node inside of level */
  58.120 +    int rank_on_level;
  58.121 +    /* Knomial recursive gather information */
  58.122 +    struct netpatterns_k_exchange_node_t k_node;
  58.123 +};
  58.124 +typedef struct netpatterns_narray_knomial_tree_node_t 
  58.125 +netpatterns_narray_knomial_tree_node_t;
  58.126 +
  58.127 +
  58.128 +/* Init code for common_netpatterns */
  58.129 +OMPI_DECLSPEC int netpatterns_init(void);
  58.130 +
  58.131 +/* setup an n-array tree */
  58.132 +OMPI_DECLSPEC int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
  58.133 +        netpatterns_tree_node_t *my_node);
  58.134 +/* setup an n-array tree with k-nomial levels */
  58.135 +OMPI_DECLSPEC int netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes,
  58.136 +        netpatterns_narray_knomial_tree_node_t *my_node);
  58.137 +
  58.138 +/* setup an multi-nomial tree - for each node in the tree
  58.139 + *  this returns it's parent, and it's children 
  58.140 + */
  58.141 +OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
  58.142 +        netpatterns_tree_node_t *tree_nodes);
  58.143 +
  58.144 +OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(int tree_order,
  58.145 +        int num_nodes, netpatterns_tree_node_t **tree_nodes);
  58.146 +
  58.147 +/* calculate the nearest power of radix that is equal to or greater
  58.148 + * than size, with the specified radix.  The resulting tree is of
  58.149 + * depth n_lvls.
  58.150 + */
  58.151 +OMPI_DECLSPEC int roundup_to_power_radix( int radix, int size, int *n_lvls );
  58.152 +
  58.153 +END_C_DECLS
  58.154 +
  58.155 +#endif /* COMM_PATTERNS_H */
    59.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    59.2 +++ b/ompi/patterns/net/netpatterns_base.c	Tue Feb 19 22:50:56 2013 +0000
    59.3 @@ -0,0 +1,53 @@
    59.4 +/*
    59.5 + *
    59.6 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    59.7 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    59.8 + * $COPYRIGHT$
    59.9 + * 
   59.10 + * Additional copyrights may follow
   59.11 + * 
   59.12 + * $HEADER$
   59.13 + */
   59.14 +#include "opal/mca/base/mca_base_param.h"
   59.15 +#include "ompi/include/ompi/constants.h"
   59.16 +#include "netpatterns.h"
   59.17 +
   59.18 +int netpatterns_base_verbose = 0; /* disabled by default */
   59.19 +
   59.20 +int netpatterns_register_mca_params(void)
   59.21 +{
   59.22 +    mca_base_param_reg_int_name("common", 
   59.23 +                                "netpatterns_base_verbose", 
   59.24 +                                "Verbosity level of the NETPATTERNS framework", 
   59.25 +                                false, false, 
   59.26 +                                0, 
   59.27 +                                &netpatterns_base_verbose);
   59.28 +
   59.29 +    return OMPI_SUCCESS;
   59.30 +}
   59.31 +
   59.32 +int netpatterns_base_err(const char* fmt, ...)
   59.33 +{
   59.34 +    va_list list;
   59.35 +    int ret;
   59.36 +
   59.37 +    va_start(list, fmt);
   59.38 +    ret = vfprintf(stderr, fmt, list);
   59.39 +    va_end(list);
   59.40 +    return ret;
   59.41 +}
   59.42 +
   59.43 +int netpatterns_init(void)
   59.44 +{
   59.45 +/* There is no component for common_netpatterns so every component that uses it
   59.46 +   should call netpatterns_init, still we want to run it only once */
   59.47 +static int was_called = 0;
   59.48 +
   59.49 +    if (0 == was_called) {
   59.50 +        was_called = 1;
   59.51 +    
   59.52 +        return netpatterns_register_mca_params();
   59.53 +    }
   59.54 +
   59.55 +    return OMPI_SUCCESS;
   59.56 +}
    60.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    60.2 +++ b/ompi/patterns/net/netpatterns_knomial_tree.c	Tue Feb 19 22:50:56 2013 +0000
    60.3 @@ -0,0 +1,932 @@
    60.4 +/*
    60.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    60.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    60.7 +* $COPYRIGHT$
    60.8 + *
    60.9 + * Additional copyrights may follow
   60.10 + *
   60.11 + * $HEADER$
   60.12 + */
   60.13 +
   60.14 +#include "ompi_config.h"
   60.15 +#ifdef HAVE_UNISTD_H
   60.16 +#include <unistd.h>
   60.17 +#endif
   60.18 +#include <sys/types.h>
   60.19 +#ifdef HAVE_SYS_MMAN_H
   60.20 +#include <sys/mman.h>
   60.21 +#endif
   60.22 +#include <fcntl.h>
   60.23 +#include <stdlib.h>
   60.24 +#include <assert.h>
   60.25 +
   60.26 +#include "ompi/constants.h"
   60.27 +#include "netpatterns.h"
   60.28 +
   60.29 +/* setup recursive doubleing tree node */
   60.30 +
   60.31 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node(
   60.32 +        int num_nodes, int node_rank, int tree_order, int *hier_ranks,
   60.33 +        netpatterns_k_exchange_node_t *exchange_node)
   60.34 +{
   60.35 +    /* local variables */
   60.36 +    int i, j, cnt, i_temp;
   60.37 +    int knt,knt2,kk, ex_node, stray;
   60.38 +    int n_levels,pow_k;
   60.39 +    int k_temp1;
   60.40 +    int k_temp2;
   60.41 +    int myid, reindex_myid = 0;
   60.42 +    int base, peer_base,base_temp;
   60.43 +    int peer; 
   60.44 +    int *prev_data = NULL;
   60.45 +    int *current_data = NULL;
   60.46 +    int *group_info = NULL;
   60.47 +
   60.48 +
   60.49 +    NETPATTERNS_VERBOSE(
   60.50 +            ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
   60.51 +                num_nodes, node_rank, tree_order));
   60.52 +
   60.53 +    assert(num_nodes > 1);
   60.54 +    assert(tree_order > 1);
   60.55 +    if (tree_order > num_nodes) {
   60.56 +        tree_order = num_nodes;
   60.57 +    }
   60.58 +
   60.59 +    /* k-nomial radix */
   60.60 +    exchange_node->tree_order = tree_order;
   60.61 +
   60.62 +    /* Calculate the number of levels in the tree for 
   60.63 +     * the largest power of tree_order less than or 
   60.64 +     * equal to the group size
   60.65 +     */
   60.66 +    n_levels = 0;
   60.67 +    cnt=1;
   60.68 +    while ( num_nodes > cnt ) {
   60.69 +        cnt *= tree_order;
   60.70 +        n_levels++;
   60.71 +    }
   60.72 +    /* this is the actual number of recusive k-ing steps 
   60.73 +     * we will perform, the last step may not be a full 
   60.74 +     * step depending on the outcome of the next conditional
   60.75 +     */
   60.76 +    pow_k = n_levels;
   60.77 +
   60.78 +    /* figure out the largest power of tree_order that is less than or equal to
   60.79 +     * num_nodes */
   60.80 +    if ( cnt > num_nodes) {
   60.81 +        cnt /= tree_order;
   60.82 +        n_levels--;
   60.83 +    }
   60.84 +
   60.85 +    /*exchange_node->log_tree_order = n_levels;*/
   60.86 +    exchange_node->log_tree_order = pow_k;
   60.87 +    exchange_node->n_largest_pow_tree_order = cnt;
   60.88 +
   60.89 +    
   60.90 +    /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
   60.91 +    /* I don't think we need to cache this info this group_info array */
   60.92 +    group_info = (int *) calloc(pow_k , sizeof(int));
   60.93 +    group_info[0] = num_nodes/tree_order;
   60.94 +    /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
   60.95 +    for ( i = 1; i < pow_k; i ++) {
   60.96 +        group_info[i] = group_info[i-1]/tree_order;
   60.97 +        /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
   60.98 +
   60.99 +    }
  60.100 +
  60.101 +    /* find number of incomplete groups and number of ranks belonging to those ranks */
  60.102 +    knt=0;
  60.103 +    while (knt <= (pow_k - 1) && group_info[knt] > 0) {
  60.104 +        knt++;
  60.105 +    }
  60.106 +    knt--;
  60.107 +    /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
  60.108 +    
  60.109 +    /* k_temp is a synonym for cnt which is the largest full power of k group */
  60.110 +    /* now, start the calculation to find the first stray rank aka "extra" rank */ 
  60.111 +    stray = 0;
  60.112 +    /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt, 
  60.113 +                                                                           cnt*group_info[knt],
  60.114 +                                                                           num_nodes - cnt*group_info[knt]);*/
  60.115 +
  60.116 +
  60.117 +    /* cache this info, it's muy importante */
  60.118 +    stray = cnt*group_info[knt];
  60.119 +    exchange_node->k_nomial_stray = stray;
  60.120 +
  60.121 +
  60.122 +
  60.123 +    /* before we do this, we need to first reindex */
  60.124 +    /* reindexing phase */
  60.125 +     /* this is the reindex phase */
  60.126 +    exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
  60.127 +    /* this is the inverse map */
  60.128 +    exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
  60.129 +    /*int reindex_myid;*/
  60.130 +    /* reindex */
  60.131 +    if( stray < num_nodes ) {
  60.132 +        /* find the first proxy rank */
  60.133 +        peer = stray - cnt;
  60.134 +        /* fix all ranks prior to this rank */
  60.135 +        for( i = 0; i < peer; i++){
  60.136 +            exchange_node->reindex_map[i] = i;
  60.137 +        }
  60.138 +        /* now, start the swap */
  60.139 +        exchange_node->reindex_map[peer] = peer;
  60.140 +        for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
  60.141 +            exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
  60.142 +        }
  60.143 +        i_temp = i;
  60.144 +        for( i = i_temp; i < stray; i++) {
  60.145 +            exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
  60.146 +        }
  60.147 +        /* now, finish it off */
  60.148 +        exchange_node->reindex_map[stray] = peer + 1;
  60.149 +        for( i = (stray+1); i < num_nodes; i++) {
  60.150 +            exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
  60.151 +        }
  60.152 +        /* debug print */
  60.153 +        /*
  60.154 +        for( i = 0; i < np; i++){
  60.155 +            fprintf(stderr,"%d ",reindex_map[i]);
  60.156 +        }
  60.157 +        fprintf(stderr,"\n");
  60.158 +        */
  60.159 +    } else {
  60.160 +        /* we have no extras, trivial reindexing */
  60.161 +        for( i = 0; i < num_nodes; i++){
  60.162 +            exchange_node->reindex_map[i] = i;
  60.163 +        }
  60.164 +    }
  60.165 +    /* finished reindexing */
  60.166 +
  60.167 +    /* Now, I need to get my rank in the new indexing */
  60.168 +    for( i = 0; i < num_nodes; i++ ){
  60.169 +        if( node_rank == exchange_node->reindex_map[i] ){
  60.170 +            exchange_node->reindex_myid = i;
  60.171 +            break;
  60.172 +        }
  60.173 +    }
  60.174 +    /* Now, let's compute the inverse mapping here */
  60.175 +    for( i = 0; i < num_nodes; i++){
  60.176 +        j = 0;
  60.177 +        while(exchange_node->reindex_map[j] != i ){
  60.178 +            j++;
  60.179 +        }
  60.180 +        exchange_node->inv_reindex_map[i] = j;
  60.181 +    }
  60.182 +
  60.183 +
  60.184 +    /* Now we get the data sizes we should expect at each level */
  60.185 +    /* now get the size of the data I am to receive from each peer */
  60.186 +    /*int **payload_info;*/
  60.187 +    prev_data = (int *) malloc( num_nodes*sizeof(int) );
  60.188 +    if( NULL == prev_data ) {
  60.189 +        goto Error;
  60.190 +    }
  60.191 +
  60.192 +    current_data = (int *) malloc( num_nodes*sizeof(int) );
  60.193 +    if( NULL == current_data ) {
  60.194 +        goto Error;
  60.195 +    }
  60.196 +
  60.197 +
  60.198 +    exchange_node->payload_info = (netpatterns_payload_t **) malloc(sizeof(netpatterns_payload_t *)*pow_k);
  60.199 +    if( NULL == exchange_node->payload_info) {
  60.200 +        goto Error;
  60.201 +    }
  60.202 +
  60.203 +    for(i = 0; i < pow_k; i++){
  60.204 +        exchange_node->payload_info[i] = (netpatterns_payload_t *) malloc(sizeof(netpatterns_payload_t)*(tree_order-1));
  60.205 +        if( NULL == exchange_node->payload_info[i]) {
  60.206 +            goto Error;
  60.207 +        }
  60.208 +
  60.209 +    }
  60.210 +    /* intialize the payload array 
  60.211 +       This is the money struct, just need to initialize this with 
  60.212 +       the subgroup information */ 
  60.213 +    /*
  60.214 +    for(i = 0; i < num_nodes; i++){
  60.215 +        prev_data[i] = 1;
  60.216 +        current_data[i] = 1;
  60.217 +    }
  60.218 +    */
  60.219 +
  60.220 +    for(i = 0; i < num_nodes; i++){
  60.221 +        prev_data[i] = hier_ranks[i];
  60.222 +        current_data[i] = hier_ranks[i];
  60.223 +    }
  60.224 +
  60.225 +    /* everyone will need to do this loop over all ranks 
  60.226 +     * Phase I calculate the contribution from the extra ranks 
  60.227 +     */
  60.228 +    for( myid = 0; myid < num_nodes; myid++) {
  60.229 +        /* get my new rank */
  60.230 +        for( j = 0; j < num_nodes; j++ ){
  60.231 +            /* this will be satisfied for one of the indices */
  60.232 +            if( myid == exchange_node->reindex_map[j] ){
  60.233 +                reindex_myid = j;
  60.234 +                break;
  60.235 +            }
  60.236 +        }
  60.237 +
  60.238 +        for( j = stray; j < num_nodes; j++) {
  60.239 +            if(reindex_myid == ( j - cnt )) {
  60.240 +                /* then this is a proxy rank */
  60.241 +                prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
  60.242 +                break;
  60.243 +            }
  60.244 +
  60.245 +        }
  60.246 +    }
  60.247 +
  60.248 +    /* Phase II calculate the contribution from each recursive k - ing level
  60.249 +     *
  60.250 +     */
  60.251 +    k_temp1 = tree_order; /* k^1 */
  60.252 +    k_temp2 = 1;   /* k^0 */
  60.253 +    peer_base = 0;
  60.254 +    base_temp = 0;
  60.255 +    for( i = 0; i < pow_k; i++) {
  60.256 +        /* get my new rank */
  60.257 +        for( myid = 0; myid < num_nodes; myid++){
  60.258 +            current_data[myid] = prev_data[myid];
  60.259 +            /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
  60.260 +            for( j = 0; j < num_nodes; j++ ){
  60.261 +                if( myid == exchange_node->reindex_map[j] ){
  60.262 +                    reindex_myid = j;
  60.263 +                    break;
  60.264 +                }
  60.265 +            }
  60.266 +            if( reindex_myid < stray ) { 
  60.267 +                /* now start the actual algorithm */
  60.268 +                FIND_BASE(base,reindex_myid,i+1,tree_order);
  60.269 +                for( j = 0; j < ( tree_order - 1 ); j ++ ) {
  60.270 +                    peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
  60.271 +                    if( peer < stray ) {
  60.272 +                        /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
  60.273 +                        /* then get the data */
  60.274 +                        if( node_rank == myid ){
  60.275 +                            exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
  60.276 +                            /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
  60.277 +                            if( i > 0 ) {
  60.278 +                                
  60.279 +                                /* find my len and offset */
  60.280 +                                FIND_BASE(peer_base,peer,i,tree_order);
  60.281 +                                /* I do not want to mess with this, but it seems that I have no choice */
  60.282 +                               ex_node = exchange_node->reindex_map[peer_base];
  60.283 +                               /* now, find out how far down the line this guy really is */
  60.284 +                               knt2 =0;
  60.285 +                               for(kk = 0; kk < ex_node; kk++){ 
  60.286 +                                   knt2 += hier_ranks[kk];
  60.287 +                               }
  60.288 +                                exchange_node->payload_info[i][j].r_offset = knt2; 
  60.289 +                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
  60.290 +                                
  60.291 +                                FIND_BASE(base_temp,reindex_myid,i,tree_order);
  60.292 +                                ex_node = exchange_node->reindex_map[base_temp];
  60.293 +                                knt2 = 0;
  60.294 +                                for( kk = 0; kk < ex_node; kk++){
  60.295 +                                    knt2 += hier_ranks[kk];
  60.296 +                                }
  60.297 +                                exchange_node->payload_info[i][j].s_offset =
  60.298 +                                                                  knt2; /* exchange_node->reindex_map[base_temp]; */
  60.299 +                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
  60.300 +                            } else {
  60.301 +                                ex_node = exchange_node->reindex_map[peer];
  60.302 +                                knt2 =0;
  60.303 +                                for(kk = 0; kk < ex_node; kk++){
  60.304 +                                    knt2 += hier_ranks[kk];
  60.305 +                                }
  60.306 +                                exchange_node->payload_info[i][j].r_offset =
  60.307 +                                    knt2; /*exchange_node->reindex_map[peer]; */
  60.308 +                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
  60.309 +                                knt2 = 0;
  60.310 +                                for(kk = 0; kk < myid; kk++){
  60.311 +                                    knt2 += hier_ranks[kk];
  60.312 +                                }
  60.313 +                                exchange_node->payload_info[i][j].s_offset = knt2; 
  60.314 +                                /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
  60.315 +                            }
  60.316 +                            /* how much I am to receive from this peer on this level */
  60.317 +                            /* how much I am to send to this peer on this level */
  60.318 +                            exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
  60.319 +                            /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
  60.320 +                            /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
  60.321 +                                                                        prev_data[exchange_node->reindex_map[peer]],
  60.322 +                                                                        exchange_node->reindex_map[peer], i+1);*/
  60.323 +                            /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
  60.324 +                                      exchange_node->reindex_map[peer],i+1);*/
  60.325 +                        }
  60.326 +
  60.327 +                        current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
  60.328 +                    }
  60.329 +                }
  60.330 +            }
  60.331 +
  60.332 +
  60.333 +        }
  60.334 +        k_temp1 *= tree_order;
  60.335 +        k_temp2 *= tree_order;
  60.336 +        /* debug print */
  60.337 +       /* fprintf(stderr,"Level %d current data ",i+1);*/
  60.338 +        for( j = 0; j < num_nodes; j++){
  60.339 +           /* fprintf(stderr,"%d ",current_data[j]); */
  60.340 +            prev_data[j] = current_data[j];
  60.341 +        }
  60.342 +       /* fprintf(stderr,"\n");*/
  60.343 +        
  60.344 +    }
  60.345 +
  60.346 +
  60.347 +    /* this is the natural way to do recursive k-ing */
  60.348 +    /* should never have more than one extra rank per proxy */
  60.349 +    if( exchange_node->reindex_myid >= stray ){
  60.350 +        /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
  60.351 +        exchange_node->node_type = EXTRA_NODE;
  60.352 +    } else {
  60.353 +        exchange_node->node_type = EXCHANGE_NODE;
  60.354 +    }
  60.355 +
  60.356 +    /* set node characteristics - node that is not within the largest
  60.357 +     * power of tree_order will just send its data to node that will participate
  60.358 +     * in the recursive k-ing, and get the result back at the end.
  60.359 +     * set the initial and final data exchanges - those that are not
  60.360 +     * part of the recursive k-ing.
  60.361 +     */
  60.362 +    if (EXCHANGE_NODE == exchange_node->node_type)  {
  60.363 +        exchange_node->n_extra_sources = 0;
  60.364 +        for( i = stray; i < num_nodes; i++) {
  60.365 +            if(exchange_node->reindex_myid == ( i - cnt )) {
  60.366 +                /* then I am a proxy rank and there is only a 
  60.367 +                 * single extra source
  60.368 +                 */
  60.369 +                exchange_node->n_extra_sources = 1;
  60.370 +                break;
  60.371 +            }
  60.372 +        }
  60.373 +
  60.374 +        if (exchange_node->n_extra_sources > 0) {
  60.375 +            exchange_node->rank_extra_sources_array = (int *) malloc
  60.376 +                (exchange_node->n_extra_sources * sizeof(int));
  60.377 +            if( NULL == exchange_node->rank_extra_sources_array ) {
  60.378 +                goto Error;
  60.379 +            }
  60.380 +            /* you broke above */
  60.381 +            exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
  60.382 +        } else {
  60.383 +            exchange_node->rank_extra_sources_array = NULL;
  60.384 +        }
  60.385 +    } else {
  60.386 +        /* I am an extra rank, find my proxy rank */
  60.387 +        exchange_node->n_extra_sources = 1;
  60.388 +
  60.389 +        exchange_node->rank_extra_sources_array = (int *) malloc
  60.390 +            (exchange_node->n_extra_sources * sizeof(int));
  60.391 +        if( NULL == exchange_node->rank_extra_sources_array ) {
  60.392 +            goto Error;
  60.393 +        }
  60.394 +        exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
  60.395 +    }
  60.396 +
  60.397 +
  60.398 +    /* set the exchange pattern */
  60.399 +    if (EXCHANGE_NODE == exchange_node->node_type) {
  60.400 +        /* yep, that's right PLUS 1 */
  60.401 +        exchange_node->n_exchanges = n_levels + 1;
  60.402 +        /* initialize this */
  60.403 +        exchange_node->n_actual_exchanges = 0;
  60.404 +        /* Allocate 2 dimension array thak keeps
  60.405 +         rank exchange information for each step*/ 
  60.406 +        exchange_node->rank_exchanges = (int **) malloc
  60.407 +            (exchange_node->n_exchanges * sizeof(int *));
  60.408 +        if(NULL == exchange_node->rank_exchanges) {
  60.409 +            goto Error;
  60.410 +        }
  60.411 +        for (i = 0; i < exchange_node->n_exchanges; i++) {
  60.412 +            exchange_node->rank_exchanges[i] = (int *) malloc
  60.413 +                ((tree_order - 1) * sizeof(int));
  60.414 +            if( NULL == exchange_node->rank_exchanges ) {
  60.415 +                goto Error;
  60.416 +            }
  60.417 +        }
  60.418 +        k_temp1 = tree_order;
  60.419 +        k_temp2 = 1;
  60.420 +        /* fill in exchange partners */
  60.421 +        /* Ok, now we start with the actual algorithm */
  60.422 +        for( i = 0; i < exchange_node->n_exchanges; i ++) {
  60.423 +            /*fprintf(stderr,"Starting Level %d\n",i+1);*/
  60.424 +
  60.425 +            FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
  60.426 +            /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
  60.427 +            for( j = 0; j < (tree_order-1); j ++ ) {
  60.428 +                peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
  60.429 +                if ( peer < stray ) {
  60.430 +                    exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
  60.431 +                    /* an actual exchange occurs, bump the counter */
  60.432 +                   
  60.433 +                } else {
  60.434 +                    /* out of range, skip it - do not bump the n_actual_exchanges counter */
  60.435 +                    exchange_node->rank_exchanges[i][j] = -1;
  60.436 +                }
  60.437 +            
  60.438 +            }
  60.439 +            k_temp1 *= tree_order;
  60.440 +            k_temp2 *= tree_order;
  60.441 +        }
  60.442 +        for(i = 0; i < pow_k; i++){
  60.443 +            for(j = 0; j < (tree_order-1); j++){
  60.444 +                if(-1 != exchange_node->rank_exchanges[i][j]){
  60.445 +                    /* then bump the counter */
  60.446 +                    exchange_node->n_actual_exchanges++;
  60.447 +                }
  60.448 +            }
  60.449 +        }
  60.450 +
  60.451 +    } else {
  60.452 +        /* we are extra ranks and we don't participate in the exchange :( */
  60.453 +        exchange_node->n_exchanges=0;
  60.454 +        exchange_node->rank_exchanges=NULL;
  60.455 +    }
  60.456 +
  60.457 +
  60.458 +    /* set the number of tags needed per stripe - this must be the
  60.459 +     *   same across all procs in the communicator.
  60.460 +     */
  60.461 +    /* do we need this one */
  60.462 +    exchange_node->n_tags = tree_order * n_levels + 1;
  60.463 +    
  60.464 +    free(prev_data);
  60.465 +    free(current_data);
  60.466 +    free(group_info);
  60.467 +
  60.468 +    /* successful return */
  60.469 +    return OMPI_SUCCESS;
  60.470 +
  60.471 +Error:
  60.472 +
  60.473 +    if (NULL != exchange_node->rank_extra_sources_array) {
  60.474 +        free(exchange_node->rank_extra_sources_array);
  60.475 +    }
  60.476 +
  60.477 +    if (NULL != exchange_node->rank_exchanges) {
  60.478 +        for (i = 0; i < exchange_node->n_exchanges; i++) {
  60.479 +            if (NULL != exchange_node->rank_exchanges[i]) {
  60.480 +                free(exchange_node->rank_exchanges[i]);
  60.481 +            }
  60.482 +        }
  60.483 +        free(exchange_node->rank_exchanges);
  60.484 +    }
  60.485 +
  60.486 +    if (NULL != prev_data ){
  60.487 +        free(prev_data);
  60.488 +    }
  60.489 +
  60.490 +    if(NULL != current_data) {
  60.491 +        free(current_data);
  60.492 +    }
  60.493 +
  60.494 +    if(NULL != group_info) {
  60.495 +        free(group_info);
  60.496 +    }
  60.497 +
  60.498 +    /* error return */
  60.499 +    return OMPI_ERROR;
  60.500 +}
  60.501 +
  60.502 +
  60.503 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node(
  60.504 +        int num_nodes, int node_rank, int tree_order,
  60.505 +        netpatterns_k_exchange_node_t *exchange_node)
  60.506 +{
  60.507 +    /* local variables */
  60.508 +    int i, j, tmp, cnt;
  60.509 +    int n_levels;
  60.510 +    int k_base, kpow_num, peer; 
  60.511 +
  60.512 +    NETPATTERNS_VERBOSE(
  60.513 +            ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
  60.514 +                num_nodes, node_rank, tree_order));
  60.515 +
  60.516 +    assert(num_nodes > 1);
  60.517 +    assert(tree_order > 1);
  60.518 +    if (tree_order > num_nodes) {
  60.519 +        tree_order = num_nodes;
  60.520 +    }
  60.521 +
  60.522 +    exchange_node->tree_order = tree_order;
  60.523 +
  60.524 +    /* figure out number of levels in the tree */
  60.525 +    n_levels = 0;
  60.526 +    /* cnt - number of ranks in given level */
  60.527 +    cnt=1;
  60.528 +    while ( num_nodes > cnt ) {
  60.529 +        cnt *= tree_order;
  60.530 +        n_levels++;
  60.531 +    };
  60.532 +
  60.533 +    /* figure out the largest power of tree_order that is less than or equal to
  60.534 +     * num_nodes */
  60.535 +    if ( cnt > num_nodes) {
  60.536 +        cnt /= tree_order;
  60.537 +        n_levels--;
  60.538 +    }
  60.539 +
  60.540 +    exchange_node->log_tree_order = n_levels;
  60.541 +    exchange_node->n_largest_pow_tree_order = cnt;
  60.542 +
  60.543 +    /* set node characteristics - node that is not within the largest
  60.544 +     *  power of tree_order will just send it's data to node that will participate
  60.545 +     *  in the recursive doubling, and get the result back at the end.
  60.546 +     */
  60.547 +    if (node_rank + 1 > cnt) {
  60.548 +        exchange_node->node_type = EXTRA_NODE;
  60.549 +    } else {
  60.550 +        exchange_node->node_type = EXCHANGE_NODE;
  60.551 +    }
  60.552 +
  60.553 +
  60.554 +    /* set the initial and final data exchanges - those that are not
  60.555 +     *   part of the recursive doubling.
  60.556 +     */
  60.557 +    if (EXCHANGE_NODE == exchange_node->node_type)  {
  60.558 +        exchange_node->n_extra_sources = 0;
  60.559 +        for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
  60.560 +                tmp < num_nodes && i < tree_order - 1;
  60.561 +                ++i, ++tmp) {
  60.562 +            ++exchange_node->n_extra_sources;
  60.563 +        }
  60.564 +
  60.565 +        assert(exchange_node->n_extra_sources < tree_order);
  60.566 +
  60.567 +        if (exchange_node->n_extra_sources > 0) {
  60.568 +            exchange_node->rank_extra_sources_array = (int *) malloc
  60.569 +                (exchange_node->n_extra_sources * sizeof(int));
  60.570 +            if( NULL == exchange_node->rank_extra_sources_array ) {
  60.571 +                goto Error;
  60.572 +            }
  60.573 +            for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
  60.574 +                    i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
  60.575 +                NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
  60.576 +                exchange_node->rank_extra_sources_array[i] = tmp;
  60.577 +            }
  60.578 +        } else {
  60.579 +            exchange_node->rank_extra_sources_array = NULL;
  60.580 +        }
  60.581 +    } else {
  60.582 +        exchange_node->n_extra_sources = 1;
  60.583 +        exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
  60.584 +        if( NULL == exchange_node->rank_extra_sources_array ) {
  60.585 +            goto Error;
  60.586 +        }
  60.587 +        exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
  60.588 +        NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
  60.589 +                    exchange_node->rank_extra_sources_array[0] ));
  60.590 +    }
  60.591 +
  60.592 +    /* set the exchange pattern */
  60.593 +    if (EXCHANGE_NODE == exchange_node->node_type) {
  60.594 +        exchange_node->n_exchanges = n_levels;
  60.595 +        /* Allocate 2 dimension array thak keeps
  60.596 +         rank exchange information for each step*/ 
  60.597 +        exchange_node->rank_exchanges = (int **) malloc
  60.598 +            (exchange_node->n_exchanges * sizeof(int *));
  60.599 +        if(NULL == exchange_node->rank_exchanges) {
  60.600 +            goto Error;
  60.601 +        }
  60.602 +        for (i = 0; i < exchange_node->n_exchanges; i++) {
  60.603 +            exchange_node->rank_exchanges[i] = (int *) malloc
  60.604 +                ((tree_order - 1) * sizeof(int));
  60.605 +            if( NULL == exchange_node->rank_exchanges ) {
  60.606 +                goto Error;
  60.607 +            }
  60.608 +        }
  60.609 +        /* fill in exchange partners */
  60.610 +        for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges; 
  60.611 +                                      i++, kpow_num *= tree_order) {
  60.612 +            k_base = node_rank / (kpow_num * tree_order);
  60.613 +            for(j = 1; j < tree_order; j++) {
  60.614 +                peer = node_rank + kpow_num * j;
  60.615 +                if (k_base != peer/(kpow_num * tree_order)) {
  60.616 +                    /* Wraparound the number */
  60.617 +                    peer = k_base * (kpow_num * tree_order)  + 
  60.618 +                        peer % (kpow_num * tree_order);
  60.619 +                }
  60.620 +                exchange_node->rank_exchanges[i][j - 1] = peer;
  60.621 +                NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d", 
  60.622 +                            i, j, tree_order, peer));
  60.623 +            }
  60.624 +        }
  60.625 +    } else {
  60.626 +        exchange_node->n_exchanges=0;
  60.627 +        exchange_node->rank_exchanges=NULL;
  60.628 +    }
  60.629 +
  60.630 +    /* set the number of tags needed per stripe - this must be the
  60.631 +     *   same across all procs in the communicator.
  60.632 +     */
  60.633 +    /* do we need this one */
  60.634 +    exchange_node->n_tags = tree_order * n_levels + 1;
  60.635 +
  60.636 +    /* successful return */
  60.637 +    return OMPI_SUCCESS;
  60.638 +
  60.639 +Error:
  60.640 +
  60.641 +    if (NULL != exchange_node->rank_extra_sources_array) {
  60.642 +        free(exchange_node->rank_extra_sources_array);
  60.643 +    }
  60.644 +
  60.645 +    if (NULL != exchange_node->rank_exchanges) {
  60.646 +        for (i = 0; i < exchange_node->n_exchanges; i++) {
  60.647 +            if (NULL != exchange_node->rank_exchanges[i]) {
  60.648 +                free(exchange_node->rank_exchanges[i]);
  60.649 +            }
  60.650 +        }
  60.651 +        free(exchange_node->rank_exchanges);
  60.652 +    }
  60.653 +
  60.654 +    /* error return */
  60.655 +    return OMPI_ERROR;
  60.656 +}
  60.657 +
  60.658 +#if 1 
  60.659 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
  60.660 +        netpatterns_pair_exchange_node_t *exchange_node)
  60.661 +{
  60.662 +    /* local variables */
  60.663 +    int i, tmp, cnt;
  60.664 +    int n_levels;
  60.665 +    int shift, mask;
  60.666 +
  60.667 +    NETPATTERNS_VERBOSE(("Enter netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
  60.668 +
  60.669 +    assert(num_nodes > 1);
  60.670 +    while (tree_order > num_nodes) {
  60.671 +        tree_order /= 2;
  60.672 +    }
  60.673 +
  60.674 +    exchange_node->tree_order = tree_order;
  60.675 +    /* We support only tree_order that are power of two */
  60.676 +    assert(0 == (tree_order & (tree_order - 1)));
  60.677 +
  60.678 +    /* figure out number of levels in the tree */
  60.679 +    n_levels = 0;
  60.680 +    /* cnt - number of ranks in given level */
  60.681 +    cnt=1;
  60.682 +    while ( num_nodes > cnt ) {
  60.683 +        cnt *= tree_order;
  60.684 +        n_levels++;
  60.685 +    };
  60.686 +
  60.687 +    /* figure out the largest power of tree_order that is less than or equal to
  60.688 +     * num_nodes */
  60.689 +    if ( cnt > num_nodes) {
  60.690 +        cnt /= tree_order;
  60.691 +        n_levels--;
  60.692 +    }
  60.693 +    exchange_node->log_tree_order = n_levels;
  60.694 +    if (2 == tree_order) {
  60.695 +        exchange_node->log_2 = exchange_node->log_tree_order;
  60.696 +    }
  60.697 +
  60.698 +    tmp=1;
  60.699 +    for (i=0 ; i < n_levels ; i++ ) {
  60.700 +        tmp *= tree_order;
  60.701 +    }
  60.702 +    /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
  60.703 +    assert(tmp == cnt);
  60.704 +
  60.705 +    exchange_node->n_largest_pow_tree_order = tmp;
  60.706 +    if (2 == tree_order) {
  60.707 +        exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
  60.708 +    }
  60.709 +
  60.710 +    /* set node characteristics - node that is not within the largest
  60.711 +     *  power of tree_order will just send it's data to node that will participate
  60.712 +     *  in the recursive doubling, and get the result back at the end.
  60.713 +     */
  60.714 +    if ( node_rank + 1 > cnt ) {
  60.715 +        exchange_node->node_type = EXTRA_NODE;
  60.716 +    } else {
  60.717 +        exchange_node->node_type = EXCHANGE_NODE;
  60.718 +    }
  60.719 +
  60.720 +    /* set the initial and final data exchanges - those that are not
  60.721 +     *   part of the recursive doubling.
  60.722 +     */
  60.723 +    if ( EXCHANGE_NODE == exchange_node->node_type ) {
  60.724 +        exchange_node->n_extra_sources = 0;
  60.725 +        for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
  60.726 +            ++exchange_node->n_extra_sources;
  60.727 +        }
  60.728 +        if (exchange_node->n_extra_sources > 0) {
  60.729 +            exchange_node->rank_extra_sources_array = (int *) malloc
  60.730 +                (exchange_node->n_extra_sources * sizeof(int));
  60.731 +            if( NULL == exchange_node->rank_extra_sources_array ) {
  60.732 +                goto Error;
  60.733 +            }
  60.734 +            for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
  60.735 +                NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
  60.736 +                exchange_node->rank_extra_sources_array[i] = tmp;
  60.737 +            }
  60.738 +        } else {
  60.739 +            exchange_node->rank_extra_sources_array = NULL;
  60.740 +        }
  60.741 +    } else {
  60.742 +        exchange_node->n_extra_sources = 1;
  60.743 +        exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
  60.744 +        if( NULL == exchange_node->rank_extra_sources_array ) {
  60.745 +            goto Error;
  60.746 +        }
  60.747 +        exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
  60.748 +        NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
  60.749 +    }
  60.750 +
  60.751 +    /* Ishai: To be compatable with the old structure - should be remoived later */
  60.752 +    if (1 == exchange_node->n_extra_sources) {
  60.753 +        exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
  60.754 +    } else {
  60.755 +        exchange_node->rank_extra_source = -1;
  60.756 +    }
  60.757 +
  60.758 +    /* set the exchange pattern */
  60.759 +    if ( EXCHANGE_NODE == exchange_node->node_type ) {
  60.760 +        exchange_node->n_exchanges = n_levels * (tree_order - 1);
  60.761 +        exchange_node->rank_exchanges = (int *) malloc
  60.762 +            (exchange_node->n_exchanges * sizeof(int));
  60.763 +        if( NULL == exchange_node->rank_exchanges ) {
  60.764 +            goto Error;
  60.765 +        }
  60.766 +
  60.767 +        /* fill in exchange partners */
  60.768 +        for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
  60.769 +            for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
  60.770 +                exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
  60.771 +                NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
  60.772 +            }
  60.773 +        }
  60.774 +
  60.775 +    } else {
  60.776 +
  60.777 +        exchange_node->n_exchanges=0;
  60.778 +        exchange_node->rank_exchanges=NULL;
  60.779 +
  60.780 +    }
  60.781 +
  60.782 +    /* set the number of tags needed per stripe - this must be the
  60.783 +     *   same across all procs in the communicator.
  60.784 +     */
  60.785 +    /* Ishai: Need to find out what is n_tags */
  60.786 +    exchange_node->n_tags = tree_order * n_levels + 1;
  60.787 +
  60.788 +    /* successful return */
  60.789 +    return OMPI_SUCCESS;
  60.790 +
  60.791 +Error:
  60.792 +    if (exchange_node->rank_extra_sources_array != NULL) {
  60.793 +        free(exchange_node->rank_extra_sources_array);
  60.794 +    }
  60.795 +
  60.796 +    /* error return */
  60.797 +    return OMPI_ERROR;
  60.798 +}
  60.799 +
  60.800 +OMPI_DECLSPEC void netpatterns_free_recursive_doubling_tree_node(
  60.801 +    netpatterns_pair_exchange_node_t *exchange_node)
  60.802 +{
  60.803 +    NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
  60.804 +    if (exchange_node->rank_extra_sources_array != NULL) {
  60.805 +        free(exchange_node->rank_extra_sources_array);
  60.806 +    }
  60.807 +
  60.808 +    if (exchange_node->rank_exchanges != NULL) {
  60.809 +        free(exchange_node->rank_exchanges);
  60.810 +    }
  60.811 +}
  60.812 +#endif
  60.813 +
  60.814 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
  60.815 +        netpatterns_pair_exchange_node_t *exchange_node)
  60.816 +{
  60.817 +    return netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
  60.818 +}
  60.819 +
  60.820 +#if 0 
  60.821 +/*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
  60.822 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
  60.823 +        netpatterns_pair_exchange_node_t *exchange_node)
  60.824 +{
  60.825 +    /* local variables */
  60.826 +    /*int tree_order;*/
  60.827 +    int i,tmp,cnt,result,n_extra_nodes;
  60.828 +    int n_exchanges;
  60.829 +
  60.830 +    /* figure out number of levels in the tree */
  60.831 +
  60.832 +    n_exchanges=0;
  60.833 +    result=num_nodes;
  60.834 +/*    tree_order=2;*/
  60.835 +    /* cnt - number of ranks in given level */
  60.836 +    cnt=1;
  60.837 +    while( num_nodes > cnt ) {
  60.838 +        cnt*=tree_order;
  60.839 +        n_exchanges++;
  60.840 +    };
  60.841 +
  60.842 +    /* figure out the largest power of 2 that is less than or equal to
  60.843 +     * num_nodes */
  60.844 +    if( cnt > num_nodes) {
  60.845 +        cnt/=tree_order;
  60.846 +        n_exchanges--;
  60.847 +    }
  60.848 +    exchange_node->log_2=n_exchanges;
  60.849 +
  60.850 +    tmp=1;
  60.851 +    for(i=0 ; i < n_exchanges ; i++ ) {
  60.852 +        tmp*=2;
  60.853 +    }
  60.854 +    exchange_node->n_largest_pow_2=tmp;
  60.855 +
  60.856 +    /* set node characteristics - node that is not within the largest
  60.857 +     *  power of 2 will just send it's data to node that will participate
  60.858 +     *  in the recursive doubling, and get the result back at the end.
  60.859 +     */
  60.860 +    if( node_rank+1 > cnt ) {
  60.861 +        exchange_node->node_type=EXTRA_NODE;
  60.862 +    } else {
  60.863 +        exchange_node->node_type=EXCHANGE_NODE;
  60.864 +    }
  60.865 +
  60.866 +    /* set the initial and final data exchanges - those that are not
  60.867 +     *   part of the recursive doubling.
  60.868 +     */
  60.869 +    n_extra_nodes=num_nodes-cnt;
  60.870 +
  60.871 +    if ( EXCHANGE_NODE == exchange_node->node_type ) {
  60.872 +
  60.873 +        if( node_rank < n_extra_nodes ) {
  60.874 +            exchange_node->n_extra_sources=1;
  60.875 +            exchange_node->rank_extra_source=cnt+node_rank;
  60.876 +        } else {
  60.877 +            exchange_node->n_extra_sources=0;
  60.878 +            exchange_node->rank_extra_source=-1;
  60.879 +        }
  60.880 +
  60.881 +    } else {
  60.882 +            exchange_node->n_extra_sources=1;
  60.883 +            exchange_node->rank_extra_source=node_rank-cnt;
  60.884 +    }
  60.885 +
  60.886 +    /* set the exchange pattern */
  60.887 +    if( EXCHANGE_NODE == exchange_node->node_type ) {
  60.888 +
  60.889 +        exchange_node->n_exchanges=n_exchanges;
  60.890 +        exchange_node->rank_exchanges=(int *) malloc
  60.891 +            (n_exchanges*sizeof(int));
  60.892 +        if( NULL == exchange_node->rank_exchanges ) {
  60.893 +            goto Error;
  60.894 +        }
  60.895 +
  60.896 +        /* fill in exchange partners */
  60.897 +        result=1;
  60.898 +        tmp=node_rank;
  60.899 +        for( i=0 ; i < n_exchanges ; i++ ) {
  60.900 +            if(tmp & 1 ) {
  60.901 +                exchange_node->rank_exchanges[i]=
  60.902 +                    node_rank-result;
  60.903 +            } else {
  60.904 +                exchange_node->rank_exchanges[i]=
  60.905 +                    node_rank+result;
  60.906 +            }
  60.907 +            result*=2;
  60.908 +            tmp/=2;
  60.909 +        }
  60.910 +
  60.911 +    } else {
  60.912 +
  60.913 +        exchange_node->n_exchanges=0;
  60.914 +        exchange_node->rank_exchanges=NULL;
  60.915 +
  60.916 +    }
  60.917 +
  60.918 +    /* set the number of tags needed per stripe - this must be the
  60.919 +     *   same across all procs in the communicator.
  60.920 +     */
  60.921 +    exchange_node->n_tags=2*n_exchanges+1;
  60.922 +
  60.923 +    /* Ishai: to make sure free will work also for people that call this function */
  60.924 +    exchange_node->rank_extra_sources_array = NULL;
  60.925 +
  60.926 +    /* successful return */
  60.927 +    return OMPI_SUCCESS;
  60.928 +
  60.929 +Error:
  60.930 +
  60.931 +    /* error return */
  60.932 +    return OMPI_ERROR;
  60.933 +}
  60.934 +#endif
  60.935 +
    61.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    61.2 +++ b/ompi/patterns/net/netpatterns_knomial_tree.h	Tue Feb 19 22:50:56 2013 +0000
    61.3 @@ -0,0 +1,254 @@
    61.4 +/*
    61.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    61.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    61.7 + * Copyright (c) 2012      Los Alamos National Security, LLC.
    61.8 + *                         All rights reserved.
    61.9 + * $COPYRIGHT$
   61.10 + *
   61.11 + * Additional copyrights may follow
   61.12 + *
   61.13 + * $HEADER$
   61.14 + */
   61.15 +
   61.16 +#ifndef COMM_PATTERNS_KNOMIAL_TREE_H
   61.17 +#define COMM_PATTERNS_KNOMIAL_TREE_H
   61.18 +
   61.19 +#include "ompi_config.h"
   61.20 +
   61.21 +BEGIN_C_DECLS
   61.22 +
   61.23 +
   61.24 +/*
   61.25 + * Pair-wise data exchange
   61.26 + */
   61.27 +
   61.28 +/* enum for node type */
   61.29 +enum {
   61.30 +    EXCHANGE_NODE,
   61.31 +    EXTRA_NODE
   61.32 +};
   61.33 +
   61.34 +struct netpatterns_pair_exchange_node_t {
   61.35 +
   61.36 +    /* Order of a node in the tree - usually 2 */
   61.37 +    int tree_order;
   61.38 +
   61.39 +    /* number of nodes this node will exchange data with */
   61.40 +    int n_exchanges;
   61.41 +
   61.42 +    /* ranks of nodes involved in data exchnge */
   61.43 +    int *rank_exchanges;
   61.44 +
   61.45 +    /* number of extra sources of data - outside largest power of 2 in
   61.46 +     *  this group */
   61.47 +    int n_extra_sources;
   61.48 +    
   61.49 +    /* rank of the extra source */
   61.50 +    /* deprecated */ int rank_extra_source;
   61.51 +    int *rank_extra_sources_array;
   61.52 +
   61.53 +    /* number of tags needed per stripe */
   61.54 +    int n_tags;
   61.55 +
   61.56 +    /* log 2 of largest full power of 2 for this node set */
   61.57 +    /* deprecated */ int log_2;
   61.58 +    int log_tree_order;
   61.59 +
   61.60 +    /* largest power of 2 that fits in this group */
   61.61 +    /* deprecated */ int n_largest_pow_2;
   61.62 +    int n_largest_pow_tree_order;
   61.63 +
   61.64 +    /* node type */
   61.65 +    int node_type;
   61.66 +
   61.67 +};
   61.68 +typedef struct netpatterns_pair_exchange_node_t netpatterns_pair_exchange_node_t;
   61.69 +
   61.70 +struct netpatterns_payload_t {
   61.71 +    int s_len;
   61.72 +    int r_len;
   61.73 +    int s_offset;
   61.74 +    int r_offset;
   61.75 +};
   61.76 +typedef struct netpatterns_payload_t netpatterns_payload_t;
   61.77 +
   61.78 +struct netpatterns_k_exchange_node_t {
   61.79 +    /* Order of a node in the tree - usually 2 */
   61.80 +    int tree_order;
   61.81 +    /* number of nodes this node will exchange data with */
   61.82 +    int n_exchanges;
   61.83 +    /* total number of exchanges that I actually participate in */
   61.84 +    int n_actual_exchanges;
   61.85 +    /* ranks of nodes involved in data exchnge */
   61.86 +    int **rank_exchanges;
   61.87 +    /* number of extra sources of data - outside largest power of 2 in
   61.88 +     *  this group */
   61.89 +    int n_extra_sources;
   61.90 +    /* rank/s of the extra source */
   61.91 +    int *rank_extra_sources_array;
   61.92 +    /* number of tags needed per stripe */
   61.93 +    int n_tags;
   61.94 +    /* log k of largest full power of k for this node set */
   61.95 +    int log_tree_order;
   61.96 +    /* largest power of k that fits in this group */
   61.97 +    int n_largest_pow_tree_order;
   61.98 +    /* node type */
   61.99 +    int node_type;
  61.100 +    /* start of extra ranks k_nomial */
  61.101 +    int k_nomial_stray;
  61.102 +    /* reindex map */
  61.103 +    int *reindex_map;
  61.104 +    /* inverse of reindex map, i.e. given a reindexed id find out its actual rank */
  61.105 +    int *inv_reindex_map;
  61.106 +    /* reindexed node_rank */
  61.107 +    int reindex_myid;
  61.108 +    /* 2-d array that hold payload info for each level of recursive k-ing */
  61.109 +    netpatterns_payload_t **payload_info;
  61.110 +};
  61.111 +typedef struct netpatterns_k_exchange_node_t
  61.112 +               netpatterns_k_exchange_node_t;
  61.113 +
  61.114 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
  61.115 +    netpatterns_pair_exchange_node_t *exchange_node);
  61.116 +
  61.117 +OMPI_DECLSPEC void netpatterns_free_recursive_doubling_tree_node(
  61.118 +    netpatterns_pair_exchange_node_t *exchange_node);
  61.119 +
  61.120 +OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
  61.121 +    netpatterns_pair_exchange_node_t *exchange_node);
  61.122 +
  61.123 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node(
  61.124 +   int num_nodes, int node_rank, int tree_order,
  61.125 +   netpatterns_k_exchange_node_t *exchange_node);
  61.126 +
  61.127 +OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node(
  61.128 +        int num_nodes, int node_rank, int tree_order, int *hier_ranks,
  61.129 +        netpatterns_k_exchange_node_t *exchange_node);
  61.130 +
  61.131 +
  61.132 +/* Input: k_exchange_node structure 
  61.133 +      Output: index in rank_exchanges array that points 
  61.134 +      to the "start_point" for outgoing send. 
  61.135 +
  61.136 +      Please see below example of usage:
  61.137 +      for (i = start_point ; i > 0; i--) 
  61.138 +          for (k = 0; k < tree_radix; k++) 
  61.139 +              send messages to exchange_node->rank_exchanges[i][k];
  61.140 +*/
  61.141 +
  61.142 +static inline __opal_attribute_always_inline__ 
  61.143 +int netpatterns_get_knomial_level(
  61.144 +    int my_rank, int src_rank, 
  61.145 +    int radix,   int size,
  61.146 +    int *k_level)
  61.147 +{
  61.148 +    int distance, 
  61.149 +        pow_k;
  61.150 +    int logk_level = 0;
  61.151 +
  61.152 +    /* Calculate disctance from source of data */
  61.153 +    distance = src_rank - my_rank; 
  61.154 +
  61.155 +    /* Wrap around */
  61.156 +    if (0 > distance) {
  61.157 +        distance += size;
  61.158 +    }
  61.159 +
  61.160 +    pow_k = 1;
  61.161 +    while(distance / (pow_k * radix)) {
  61.162 +        pow_k *= radix;
  61.163 +        ++logk_level;
  61.164 +    }
  61.165 +    --logk_level;
  61.166 +
  61.167 +    *k_level = pow_k;
  61.168 +    return logk_level;
  61.169 +}
  61.170 +
  61.171 +/* Input: my_rank, root, radix, size
  61.172 + * Output: source of the data, offset in power of K
  61.173 + */
  61.174 +static inline __opal_attribute_always_inline__ 
  61.175 +int netpatterns_get_knomial_data_source(
  61.176 +    int my_rank, int root, int radix, int size,
  61.177 +    int *k_level, int *logk_level)
  61.178 +{
  61.179 +    int level = radix;
  61.180 +    int step = 0;
  61.181 +
  61.182 +    /* Calculate source of the data */
  61.183 +    while((0 == (root - my_rank) % level) 
  61.184 +            && (level <= size)) {
  61.185 +        level *= radix;
  61.186 +        ++step;
  61.187 +    }   
  61.188 +    
  61.189 +    *k_level = level/radix;
  61.190 +    *logk_level = step;
  61.191 +    return my_rank - (my_rank % level - root % level);
  61.192 +}
  61.193 +
  61.194 +/* Input: my_rank, radix,
  61.195 + *        k_level - that you get from netpatterns_get_knomial_data_source
  61.196 + *        k_step - some integer
  61.197 + * Output: peer - next children in the tree
  61.198 + * Usage: 
  61.199 + *         src = netpatterns_get_knomial_data_source(
  61.200 + *                  my_rank, root, radix, size,
  61.201 + *                  &k_level, &logk_level)
  61.202 + *         recv_from(src......);
  61.203 + *
  61.204 + *         MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, k_level, my_rank);
  61.205 + *         while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
  61.206 + *              MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer);
  61.207 + *              send_to(peer....);
  61.208 + *         }
  61.209 + * for more example please grep in ptpcoll bcol bcast files
  61.210 + */
  61.211 +
  61.212 +typedef struct netpatterns_knomial_step_info_t {
  61.213 +    int k_step;
  61.214 +    int k_level;
  61.215 +    int k_tmp_peer;
  61.216 +} netpatterns_knomial_step_info_t;
  61.217 +
  61.218 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix)\
  61.219 +do {                                                                                    \
  61.220 +    if (1 != step_info.k_step) {                                                        \
  61.221 +        step_info.k_level /= radix;                                                     \
  61.222 +    }                                                                                   \
  61.223 +} while (0)                                                                             \
  61.224 +
  61.225 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, in_k_level, in_peer)\
  61.226 +do {                                                                                \
  61.227 +    step_info.k_step  = 1;                                                          \
  61.228 +    step_info.k_level = in_k_level;                                                 \
  61.229 +    step_info.k_tmp_peer = in_peer;                                                 \
  61.230 +} while (0)
  61.231 +
  61.232 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info) \
  61.233 +                                                    (step_info.k_level > 1)
  61.234 +
  61.235 +#define MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_rank, radix, step_info, peer)           \
  61.236 +do {                                                                                            \
  61.237 +    int rank_radix_base = my_rank/step_info.k_level;                                            \
  61.238 +                                                                                                \
  61.239 +    peer = step_info.k_tmp_peer + step_info.k_level/radix;                                      \
  61.240 +    if (rank_radix_base != peer/step_info.k_level) {                                            \
  61.241 +        /* Wraparound the number */                                                             \
  61.242 +        peer -= step_info.k_level;                                                              \
  61.243 +        assert(peer >=0);                                                                       \
  61.244 +    }                                                                                           \
  61.245 +    ++step_info.k_step;                                                                         \
  61.246 +    if (radix == step_info.k_step) {                                                            \
  61.247 +        step_info.k_level /= radix;                                                             \
  61.248 +        step_info.k_step = 1;                                                                   \
  61.249 +        step_info.k_tmp_peer = my_rank;                                                         \
  61.250 +    } else {                                                                                    \
  61.251 +        step_info.k_tmp_peer = peer;                                                            \
  61.252 +    }                                                                                           \
  61.253 +                                                                                                \
  61.254 +} while (0)
  61.255 +
  61.256 +END_C_DECLS
  61.257 +#endif
    62.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    62.2 +++ b/ompi/patterns/net/netpatterns_multinomial_tree.c	Tue Feb 19 22:50:56 2013 +0000
    62.3 @@ -0,0 +1,190 @@
    62.4 +/*
    62.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    62.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    62.7 + * $COPYRIGHT$
    62.8 + *
    62.9 + * Additional copyrights may follow
   62.10 + *
   62.11 + * $HEADER$
   62.12 + */
   62.13 +
   62.14 +#include "ompi_config.h"
   62.15 +#ifdef HAVE_UNISTD_H
   62.16 +#include <unistd.h>
   62.17 +#endif
   62.18 +#include <sys/types.h>
   62.19 +#ifdef HAVE_SYS_MMAN_H
   62.20 +#include <sys/mman.h>
   62.21 +#endif
   62.22 +#include <fcntl.h>
   62.23 +#include <stdlib.h>
   62.24 +
   62.25 +#include "ompi/constants.h"
   62.26 +#include "netpatterns.h"
   62.27 +
   62.28 +
   62.29 +/* setup an multi-nomial tree - for each node in the tree
   62.30 + *  this returns it's parent, and it's children */
   62.31 +
   62.32 +OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes,
   62.33 +        netpatterns_tree_node_t *tree_nodes)
   62.34 +{
   62.35 +    /* local variables */
   62.36 +    int i,result;
   62.37 +    int cnt, n_nodes_in_this_level,node_index;
   62.38 +    int n_cum_nodes,current_level,node,n_nodes_prev_level,rank,parent_rank;
   62.39 +    int n_nodes_in_last_level,n_full_stripes,n_in_partial_stipe,n_children;
   62.40 +    int n_lvls_in_tree;
   62.41 +
   62.42 +    /* sanity check */
   62.43 +    if( 1 >= tree_order ) {
   62.44 +        goto Error;
   62.45 +    }
   62.46 +
   62.47 +
   62.48 +    /* figure out number of levels in the tree */
   62.49 +
   62.50 +    n_lvls_in_tree=0;
   62.51 +    result=num_nodes;
   62.52 +    /* cnt - number of ranks in given level */
   62.53 +    cnt=1;
   62.54 +    /*  cummulative count of ranks */
   62.55 +    while( 0 < result ) {
   62.56 +        result-=cnt;
   62.57 +        cnt*=tree_order; 
   62.58 +        n_lvls_in_tree++;
   62.59 +    };  
   62.60 +
   62.61 +    /* loop over tree levels */
   62.62 +    n_nodes_in_this_level=1;
   62.63 +    node_index=-1;
   62.64 +    n_cum_nodes=0;
   62.65 +    for( current_level = 0 ; current_level < n_lvls_in_tree ; current_level++) {
   62.66 +
   62.67 +        /* loop over nodes in current level */
   62.68 +        for ( node=0 ; node < n_nodes_in_this_level ; node++ ) {
   62.69 +            /* get node index */
   62.70 +            node_index++;
   62.71 +            
   62.72 +            /* break if reach group size */
   62.73 +            if( node_index == num_nodes) {
   62.74 +                break;
   62.75 +            }
   62.76 +
   62.77 +            tree_nodes[node_index].my_rank=node_index;
   62.78 +            tree_nodes[node_index].children_ranks=NULL;
   62.79 +
   62.80 +            /*
   62.81 +             *  Parents
   62.82 +             */
   62.83 +            if( 0 == current_level ) {
   62.84 +                tree_nodes[node_index].n_parents=0;
   62.85 +                /* get parent index */
   62.86 +                tree_nodes[node_index].parent_rank=-1;
   62.87 +            } else {
   62.88 +                tree_nodes[node_index].n_parents=1;
   62.89 +                /* get parent index */
   62.90 +                n_nodes_prev_level=n_nodes_in_this_level/tree_order;
   62.91 +                if( current_level == n_lvls_in_tree -1 ) {
   62.92 +                    /* load balance the lowest level */
   62.93 +                    parent_rank=node-
   62.94 +                        (node/n_nodes_prev_level)*n_nodes_prev_level;
   62.95 +                    parent_rank=n_cum_nodes-n_nodes_prev_level+
   62.96 +                        parent_rank;
   62.97 +                    tree_nodes[node_index].parent_rank=parent_rank;
   62.98 +                } else {
   62.99 +                    tree_nodes[node_index].parent_rank=
  62.100 +                        (n_cum_nodes-n_nodes_prev_level)+node/tree_order;
  62.101 +                }
  62.102 +            }
  62.103 +
  62.104 +            /*
  62.105 +             * Children
  62.106 +             */
  62.107 +
  62.108 +            /* get number of children */
  62.109 +            if( (n_lvls_in_tree-1) == current_level ) {
  62.110 +                /* leaves have no nodes */
  62.111 +                tree_nodes[node_index].n_children=0;
  62.112 +                tree_nodes[node_index].children_ranks=NULL;
  62.113 +            } else {
  62.114 +                /* take into account last level being incomplete */
  62.115 +                if( (n_lvls_in_tree-2) == current_level ) {
  62.116 +                    /* last level is load balanced */
  62.117 +                    n_nodes_in_last_level=num_nodes-
  62.118 +                        (n_cum_nodes+n_nodes_in_this_level);
  62.119 +                    n_full_stripes=n_nodes_in_last_level/n_nodes_in_this_level;
  62.120 +                    n_in_partial_stipe=n_nodes_in_last_level-
  62.121 +                        n_full_stripes*n_nodes_in_this_level;
  62.122 +                    n_children=n_full_stripes;
  62.123 +                    if( n_full_stripes < tree_order ) {
  62.124 +                        if( node <= n_in_partial_stipe-1 ) {
  62.125 +                            n_children++;
  62.126 +                        }
  62.127 +                    }
  62.128 +                    tree_nodes[node_index].n_children=n_children;
  62.129 +                    if( 0 < n_children ) {
  62.130 +                        tree_nodes[node_index].children_ranks=(int *)
  62.131 +                            malloc(sizeof(int)*n_children);
  62.132 +                        if( NULL == tree_nodes[node_index].children_ranks) {
  62.133 +                            goto Error;
  62.134 +                        }
  62.135 +                    } else {
  62.136 +                        tree_nodes[node_index].children_ranks=NULL;
  62.137 +                    }
  62.138 +                    /* fill in list */
  62.139 +                    for( rank=0 ; rank < n_children ; rank++ ) {
  62.140 +                        tree_nodes[node_index].children_ranks[rank]=
  62.141 +                            node+rank*n_nodes_in_this_level;
  62.142 +                        tree_nodes[node_index].children_ranks[rank]+=
  62.143 +                            (n_cum_nodes+n_nodes_in_this_level);
  62.144 +                    }
  62.145 +                } else {
  62.146 +                    n_children=tree_order;
  62.147 +                    tree_nodes[node_index].n_children=tree_order;
  62.148 +                    tree_nodes[node_index].children_ranks=(int *)
  62.149 +                        malloc(sizeof(int)*n_children);
  62.150 +                    if( NULL == tree_nodes[node_index].children_ranks) {
  62.151 +                        goto Error;
  62.152 +                    }
  62.153 +                    for( rank=0 ; rank < n_children ; rank++ ) {
  62.154 +                        tree_nodes[node_index].children_ranks[rank]=
  62.155 +                            rank+tree_order*node;
  62.156 +                        tree_nodes[node_index].children_ranks[rank]+=
  62.157 +                            (n_cum_nodes+n_nodes_in_this_level);
  62.158 +                    }
  62.159 +                }
  62.160 +            }
  62.161 +
  62.162 +        } /* end node loop */
  62.163 +
  62.164 +        /* update helper counters */
  62.165 +        n_cum_nodes+=n_nodes_in_this_level;
  62.166 +        n_nodes_in_this_level*=tree_order;
  62.167 +    }
  62.168 +
  62.169 +    /* set node type */
  62.170 +    for(i=0 ; i < num_nodes ; i++ ) {
  62.171 +        if( 0 == tree_nodes[i].n_parents ) {
  62.172 +            tree_nodes[i].my_node_type=ROOT_NODE;
  62.173 +        } else if ( 0 == tree_nodes[i].n_children ) {
  62.174 +            tree_nodes[i].my_node_type=LEAF_NODE;
  62.175 +        } else {
  62.176 +            tree_nodes[i].my_node_type=INTERIOR_NODE;
  62.177 +        }
  62.178 +    }
  62.179 +
  62.180 +    /* successful return */
  62.181 +    return OMPI_SUCCESS;
  62.182 +
  62.183 +Error:
  62.184 +    /* free allocated memory */
  62.185 +    for( i=0 ; i < num_nodes ; i++ ) {
  62.186 +        if( NULL != tree_nodes[i].children_ranks ) {
  62.187 +            free(tree_nodes[i].children_ranks);
  62.188 +        }
  62.189 +    }
  62.190 +
  62.191 +    /* error return */
  62.192 +    return OMPI_ERROR;
  62.193 +}
    63.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.2 +++ b/ompi/patterns/net/netpatterns_nary_tree.c	Tue Feb 19 22:50:56 2013 +0000
    63.3 @@ -0,0 +1,443 @@
    63.4 +/*
    63.5 + * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
    63.6 + * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
    63.7 + * $COPYRIGHT$
    63.8 + *
    63.9 + * Additional copyrights may follow
   63.10 + *
   63.11 + * $HEADER$
   63.12 + */
   63.13 +
   63.14 +#include "ompi_config.h"
   63.15 +#ifdef HAVE_UNISTD_H
   63.16 +#include <unistd.h>
   63.17 +#endif
   63.18 +#include <sys/types.h>
   63.19 +#ifdef HAVE_SYS_MMAN_H
   63.20 +#include <sys/mman.h>
   63.21 +#endif
   63.22 +#include <fcntl.h>
   63.23 +#include <errno.h>
   63.24 +#include <stdlib.h>
   63.25 +#include <stdio.h>
   63.26 +
   63.27 +#include "ompi/constants.h"
   63.28 +#include "netpatterns.h"
   63.29 +
   63.30 +/*
   63.31 + * Create mmaped shared file
   63.32 + */
   63.33 +
   63.34 +/* setup an n-array tree */
   63.35 +
   63.36 +int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes,
   63.37 +        netpatterns_tree_node_t *my_node)
   63.38 +{
   63.39 +    /* local variables */
   63.40 +    int n_levels, result;
   63.41 +    int my_level_in_tree, cnt;
   63.42 +    int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
   63.43 +    int start_index,end_index;
   63.44 +
   63.45 +    /* sanity check */
   63.46 +    if( 1 >= tree_order ) {
   63.47 +        goto Error;
   63.48 +    }
   63.49 +
   63.50 +    my_node->my_rank=my_rank;
   63.51 +    my_node->tree_size=num_nodes;
   63.52 +
   63.53 +    /* figure out number of levels in tree */
   63.54 +    n_levels=0;
   63.55 +    result=num_nodes-1;
   63.56 +    while (0 < result ) {
   63.57 +        result/=tree_order;
   63.58 +        n_levels++;
   63.59 +    };
   63.60 +
   63.61 +    /* figure out who my children and parents are */
   63.62 +    my_level_in_tree=-1;
   63.63 +    result=my_rank;
   63.64 +    /* cnt - number of ranks in given level */
   63.65 +    cnt=1;
   63.66 +    /*  cummulative count of ranks */
   63.67 +    while( 0 <= result ) {
   63.68 +        result-=cnt;
   63.69 +        cnt*=tree_order;
   63.70 +        my_level_in_tree++;
   63.71 +    };
   63.72 +    /* int my_level_in_tree, n_children, n_parents; */
   63.73 +
   63.74 +    if( 0 == my_rank ) {
   63.75 +        my_node->n_parents=0;
   63.76 +        my_node->parent_rank=-1;
   63.77 +        my_rank_in_my_level=0;
   63.78 +    } else {
   63.79 +        my_node->n_parents=1;
   63.80 +        cnt=1;
   63.81 +        cum_cnt=0;
   63.82 +        for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
   63.83 +            /* cummulative count up to this level */
   63.84 +            cum_cnt+=cnt;
   63.85 +            /* number of ranks in this level */
   63.86 +            cnt*=tree_order;
   63.87 +        }
   63.88 +        my_rank_in_my_level=my_rank-cum_cnt;
   63.89 +        /* tree_order consecutive ranks have the same parent */
   63.90 +        my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
   63.91 +    }
   63.92 +    
   63.93 +    /* figure out number of levels in the tree */
   63.94 +    n_lvls_in_tree=0;
   63.95 +    result=num_nodes;
   63.96 +    /* cnt - number of ranks in given level */
   63.97 +    cnt=1;
   63.98 +    /*  cummulative count of ranks */
   63.99 +    while( 0 < result ) {
  63.100 +        result-=cnt;
  63.101 +        cnt*=tree_order;
  63.102 +        n_lvls_in_tree++;
  63.103 +    };
  63.104 +
  63.105 +    my_node->children_ranks=(int *)NULL;
  63.106 +
  63.107 +    /* get list of children */
  63.108 +    if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
  63.109 +        /* last level has no children */
  63.110 +        my_node->n_children=0;
  63.111 +    } else {
  63.112 +        cum_cnt=0;
  63.113 +        cnt=1;
  63.114 +        for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
  63.115 +            cum_cnt+=cnt;
  63.116 +            cnt*=tree_order;
  63.117 +        }
  63.118 +        start_index=cum_cnt+my_rank_in_my_level*tree_order;
  63.119 +        end_index=start_index+tree_order-1;
  63.120 +
  63.121 +        /* don't go out of bounds at the end of the list */
  63.122 +        if( end_index >= num_nodes ) {
  63.123 +            end_index = num_nodes-1;
  63.124 +        }
  63.125 +
  63.126 +        if( start_index <= (num_nodes-1) ) {
  63.127 +            my_node->n_children=end_index-start_index+1;
  63.128 +        } else {
  63.129 +            my_node->n_children=0;
  63.130 +        }
  63.131 +
  63.132 +        my_node->children_ranks=NULL;
  63.133 +        if( 0 < my_node->n_children ) {
  63.134 +            my_node->children_ranks=
  63.135 +                (int *)malloc( sizeof(int)*my_node->n_children);
  63.136 +            if( NULL == my_node->children_ranks) {
  63.137 +                goto Error;
  63.138 +            }
  63.139 +            for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
  63.140 +                my_node->children_ranks[lvl-start_index]=lvl;
  63.141 +            }
  63.142 +        } 
  63.143 +    }
  63.144 +    /* set node type */
  63.145 +    if( 0 == my_node->n_parents ) {
  63.146 +        my_node->my_node_type=ROOT_NODE;
  63.147 +    } else if ( 0 == my_node->n_children ) {
  63.148 +        my_node->my_node_type=LEAF_NODE;
  63.149 +    } else {
  63.150 +        my_node->my_node_type=INTERIOR_NODE;
  63.151 +    }
  63.152 +
  63.153 +
  63.154 +    /* successful return */
  63.155 +    return OMPI_SUCCESS;
  63.156 +
  63.157 +Error:
  63.158 +
  63.159 +    /* error return */
  63.160 +    return OMPI_ERROR;
  63.161 +}
  63.162 +
  63.163 +int netpatterns_setup_narray_knomial_tree(
  63.164 +        int tree_order, int my_rank, int num_nodes,
  63.165 +        netpatterns_narray_knomial_tree_node_t *my_node)
  63.166 +{
  63.167 +    /* local variables */
  63.168 +    int n_levels, result;
  63.169 +    int my_level_in_tree, cnt ;
  63.170 +    int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
  63.171 +    int start_index,end_index;
  63.172 +    int rc;
  63.173 +
  63.174 +    /* sanity check */
  63.175 +    if( 1 >= tree_order ) {
  63.176 +        goto Error;
  63.177 +    }
  63.178 +
  63.179 +    my_node->my_rank=my_rank;
  63.180 +    my_node->tree_size=num_nodes;
  63.181 +
  63.182 +    /* figure out number of levels in tree */
  63.183 +    n_levels=0;
  63.184 +    result=num_nodes-1;
  63.185 +    while (0 < result ) {
  63.186 +        result/=tree_order;
  63.187 +        n_levels++;
  63.188 +    };
  63.189 +
  63.190 +    /* figure out who my children and parents are */
  63.191 +    my_level_in_tree=-1;
  63.192 +    result=my_rank;
  63.193 +    /* cnt - number of ranks in given level */
  63.194 +    cnt=1;
  63.195 +    /*  cummulative count of ranks */
  63.196 +    while( 0 <= result ) {
  63.197 +        result-=cnt;
  63.198 +        cnt*=tree_order;
  63.199 +        my_level_in_tree++;
  63.200 +    };
  63.201 +    /* int my_level_in_tree, n_children, n_parents; */
  63.202 +
  63.203 +    if( 0 == my_rank ) {
  63.204 +        my_node->n_parents=0;
  63.205 +        my_node->parent_rank=-1;
  63.206 +        my_rank_in_my_level=0;
  63.207 +    } else {
  63.208 +        my_node->n_parents=1;
  63.209 +        cnt=1;
  63.210 +        cum_cnt=0;
  63.211 +        for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
  63.212 +            /* cummulative count up to this level */
  63.213 +            cum_cnt+=cnt;
  63.214 +            /* number of ranks in this level */
  63.215 +            cnt*=tree_order;
  63.216 +        }
  63.217 +
  63.218 +        my_node->rank_on_level = 
  63.219 +            my_rank_in_my_level = 
  63.220 +            my_rank-cum_cnt;
  63.221 +        my_node->level_size = cnt;
  63.222 +
  63.223 +        rc = netpatterns_setup_recursive_knomial_tree_node(
  63.224 +                my_node->level_size, my_node->rank_on_level, 
  63.225 +                tree_order, &my_node->k_node);
  63.226 +        if (OMPI_SUCCESS != rc) {
  63.227 +            goto Error;
  63.228 +        }
  63.229 +
  63.230 +        /* tree_order consecutive ranks have the same parent */
  63.231 +        my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
  63.232 +    }
  63.233 +    
  63.234 +    /* figure out number of levels in the tree */
  63.235 +    n_lvls_in_tree=0;
  63.236 +    result=num_nodes;
  63.237 +    /* cnt - number of ranks in given level */
  63.238 +    cnt=1;
  63.239 +    /*  cummulative count of ranks */
  63.240 +    while( 0 < result ) {
  63.241 +        result-=cnt;
  63.242 +        cnt*=tree_order;
  63.243 +        n_lvls_in_tree++;
  63.244 +    };
  63.245 +
  63.246 +    if(result < 0) {
  63.247 +        /* reset the size on group */
  63.248 +        num_nodes = cnt / tree_order;
  63.249 +    }
  63.250 +
  63.251 +    my_node->children_ranks=(int *)NULL;
  63.252 +
  63.253 +    /* get list of children */
  63.254 +    if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
  63.255 +        /* last level has no children */
  63.256 +        my_node->n_children=0;
  63.257 +    } else {
  63.258 +        cum_cnt=0;
  63.259 +        cnt=1;
  63.260 +        for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
  63.261 +            cum_cnt+=cnt;
  63.262 +            cnt*=tree_order;
  63.263 +        }
  63.264 +        start_index=cum_cnt+my_rank_in_my_level*tree_order;
  63.265 +        end_index=start_index+tree_order-1;
  63.266 +
  63.267 +        /* don't go out of bounds at the end of the list */
  63.268 +        if( end_index >= num_nodes ) {
  63.269 +            end_index = num_nodes-1;
  63.270 +        }
  63.271 +
  63.272 +        if( start_index <= (num_nodes-1) ) {
  63.273 +            my_node->n_children=end_index-start_index+1;
  63.274 +        } else {
  63.275 +            my_node->n_children=0;
  63.276 +        }
  63.277 +
  63.278 +        my_node->children_ranks=NULL;
  63.279 +        if( 0 < my_node->n_children ) {
  63.280 +            my_node->children_ranks=
  63.281 +                (int *)malloc( sizeof(int)*my_node->n_children);
  63.282 +            if( NULL == my_node->children_ranks) {
  63.283 +                goto Error;
  63.284 +            }
  63.285 +            for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
  63.286 +                my_node->children_ranks[lvl-start_index]=lvl;
  63.287 +            }
  63.288 +        } 
  63.289 +    }
  63.290 +    /* set node type */
  63.291 +    if( 0 == my_node->n_parents ) {
  63.292 +        my_node->my_node_type=ROOT_NODE;
  63.293 +    } else if ( 0 == my_node->n_children ) {
  63.294 +        my_node->my_node_type=LEAF_NODE;
  63.295 +    } else {
  63.296 +        my_node->my_node_type=INTERIOR_NODE;
  63.297 +    }
  63.298 +
  63.299 +
  63.300 +    /* successful return */
  63.301 +    return OMPI_SUCCESS;
  63.302 +
  63.303 +Error:
  63.304 +
  63.305 +    /* error return */
  63.306 +    return OMPI_ERROR;
  63.307 +}
  63.308 +
  63.309 +/* calculate the nearest power of radix that is equal to or greater
  63.310 + * than size, with the specified radix.  The resulting tree is of
  63.311 + * depth n_lvls.
  63.312 + */
  63.313 +OMPI_DECLSPEC int roundup_to_power_radix ( int radix, int size, int *n_lvls )
  63.314 +{
  63.315 +    int n_levels=0, return_value=1;
  63.316 +    int result;
  63.317 +    if( 1 > size ) {
  63.318 +        return 0;
  63.319 +    }
  63.320 +
  63.321 +    result=size-1;
  63.322 +    while (0 < result ) {
  63.323 +        result/=radix;
  63.324 +        n_levels++;
  63.325 +        return_value*=radix;
  63.326 +    };
  63.327 +    *n_lvls=n_levels;
  63.328 +    return return_value;
  63.329 +}
  63.330 +
  63.331 +static int fill_in_node_data(int tree_order, int num_nodes, int my_node,
  63.332 +        netpatterns_tree_node_t *nodes_data)
  63.333 +{
  63.334 +    /* local variables */
  63.335 +    int rc, num_ranks_per_child, num_children, n_extra;
  63.336 +    int child, rank, n_to_offset, n_ranks_to_child;
  63.337 +    
  63.338 +    /* figure out who are my children */
  63.339 +    num_ranks_per_child=num_nodes/tree_order;
  63.340 +    if( num_ranks_per_child ) {
  63.341 +        num_children=tree_order;
  63.342 +        n_extra=num_nodes-num_ranks_per_child*tree_order;
  63.343 +    } else {
  63.344 +        num_children=num_nodes;
  63.345 +        /* each child has the same number of descendents - 1 */
  63.346 +        n_extra=0;
  63.347 +        /* when there is a child, there is at least one
  63.348 +         * descendent */
  63.349 +        num_ranks_per_child=1;
  63.350 +    }
  63.351 +
  63.352 +    nodes_data[my_node].n_children=num_children;
  63.353 +    if( num_children ) {
  63.354 +        nodes_data[my_node].children_ranks=(int *)
  63.355 +            malloc(sizeof(int)*num_children);
  63.356 +        if(!nodes_data[my_node].children_ranks) {
  63.357 +
  63.358 +            if ( NULL == nodes_data[my_node].children_ranks )
  63.359 +            {
  63.360 +                fprintf(stderr, "Cannot allocate memory for children_ranks.\n");
  63.361 +                rc = OMPI_ERR_OUT_OF_RESOURCE;
  63.362 +                goto error;
  63.363 +            }
  63.364 +        }
  63.365 +    }
  63.366 +
  63.367 +    rank = my_node;
  63.368 +    for( child=0 ; child < num_children ; child ++ ) {
  63.369 +
  63.370 +    /* set parent information */
  63.371 +        nodes_data[rank].n_parents=1;
  63.372 +        nodes_data[rank].parent_rank=my_node;
  63.373 +        if( n_extra ) {
  63.374 +            n_to_offset=child;
  63.375 +            if( n_to_offset > n_extra){
  63.376 +                n_to_offset=n_extra;
  63.377 +            }
  63.378 +        } else {
  63.379 +            n_to_offset=0;
  63.380 +        }
  63.381 +
  63.382 +        rank=my_node+1+child*num_ranks_per_child;
  63.383 +        rank+=n_to_offset;
  63.384 +
  63.385 +        /* set parent information */        
  63.386 +        nodes_data[rank].n_parents=1;
  63.387 +        nodes_data[rank].parent_rank=my_node;
  63.388 +
  63.389 +        n_ranks_to_child=num_ranks_per_child;
  63.390 +        if(n_extra && (child < n_extra) ) {
  63.391 +            n_ranks_to_child++;
  63.392 +        }
  63.393 +    
  63.394 +        /* set child information */
  63.395 +        nodes_data[my_node].children_ranks[child]=rank;
  63.396 +
  63.397 +        /* remove the child from the list of ranks */
  63.398 +        n_ranks_to_child--;
  63.399 +        rc=fill_in_node_data(tree_order, n_ranks_to_child, rank, nodes_data);
  63.400 +        if( OMPI_SUCCESS != rc ) {
  63.401 +            goto error;
  63.402 +        }
  63.403 +
  63.404 +    }
  63.405 +
  63.406 +    /* return */
  63.407 +    return OMPI_SUCCESS;
  63.408 +
  63.409 +    /* Error */
  63.410 +error:
  63.411 +    return rc;
  63.412 +
  63.413 +}
  63.414 +
  63.415 +/*
  63.416 + * This routine sets up the array describing the communication tree for
  63.417 + * a k-ary tree where the children form a contiguous range of ranks at
  63.418 + * each level.  The assumption here is that rank 0 is always the root -
  63.419 + * ranks may be rotated based on who the actual root is, to obtain the
  63.420 + * appropriate communication pattern for such roots.
  63.421 + */
  63.422 +OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(
  63.423 +        int tree_order, int num_nodes, 
  63.424 +        netpatterns_tree_node_t **tree_nodes)
  63.425 +{
  63.426 +    /* local variables */
  63.427 +    int num_descendent_ranks=num_nodes-1;
  63.428 +    int rc=OMPI_SUCCESS;
  63.429 +
  63.430 +    *tree_nodes=(netpatterns_tree_node_t *)malloc(
  63.431 +            sizeof(netpatterns_tree_node_t)*
  63.432 +            num_nodes);
  63.433 +    if(!(*tree_nodes) ) {
  63.434 +        fprintf(stderr, "Cannot allocate memory for tree_nodes.\n");
  63.435 +        rc = OMPI_ERR_OUT_OF_RESOURCE;
  63.436 +        return rc;
  63.437 +    }
  63.438 +
  63.439 +    (*tree_nodes)[0].n_parents=0;
  63.440 +    rc=fill_in_node_data(tree_order, 
  63.441 +            num_descendent_ranks, 0, *tree_nodes);
  63.442 +
  63.443 +    /* successful return */
  63.444 +    return rc;
  63.445 +
  63.446 +}