Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] [PATCH] openib btl: extensable cpc selection enablement
From: Jon Mason (jon_at_[hidden])
Date: 2008-01-09 18:37:54


The new cpc selection framework is now in place. The patch below allows
for dynamic selection of cpc methods based on what is available. It
also allows for inclusion/exclusions of methods. It even futher allows
for modifying the priorities of certain cpc methods to better determine
the optimal cpc method.

This patch also contains XRC compile time disablement (per Jeff's
patch).

At a high level, the cpc selections works by walking through each cpc
and allowing it to test to see if it is permissable to run on this
mpirun. It returns a priority if it is permissable or a -1 if not. All
of the cpc names and priorities are rolled into a string. This string
is then encapsulated in a message and passed around all the ompi
processes. Once received and unpacked, the list received is compared
to a local copy of the list. The connection method is chosen by
comparing the lists passed around to all nodes via modex with the list
generated locally. Any non-negative number is a potentially valid
connection method. The method below of determining the optimal
connection method is to take the cross-section of the two lists. The
highest single value (and the other side being non-negative) is selected
as the cpc method.

Please test it out. The tree can be found at
https://svn.open-mpi.org/svn/ompi/tmp-public/openib-cpc/

This patch has been tested with IB and iWARP adapters on a 2 node system
(with it correctly choosing to use oob and happily ignoring iWARP
adapters). It needs XRC testing and testing of larger node systems.

Many thanks to Jeff for all of his help.

Thanks,
Jon

Index: ompi/mca/btl/openib/btl_openib_component.c
===================================================================
--- ompi/mca/btl/openib/btl_openib_component.c (revision 17101)
+++ ompi/mca/btl/openib/btl_openib_component.c (working copy)
@@ -155,30 +155,70 @@
  */
 static int btl_openib_modex_send(void)
 {
- int rc, i;
- size_t size;
- mca_btl_openib_port_info_t *ports = NULL;
+ int rc, i;
+ char *message, *offset;
+ uint32_t size, size_save;
+ size_t msg_size;
 
- size = mca_btl_openib_component.ib_num_btls * sizeof (mca_btl_openib_port_info_t);
- if (size != 0) {
- ports = (mca_btl_openib_port_info_t *)malloc (size);
- if (NULL == ports) {
- BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
- return OMPI_ERR_OUT_OF_RESOURCE;
- }
+ /* The message is packed into 2 parts:
+ * 1. a uint32_t indicating the number of ports in the message
+ * 2. for each port:
+ * a. the port data
+ * b. a uint32_t indicating a string length
+ * c. the string cpc list for that port, length specified by 2b.
+ */
+ msg_size = sizeof(uint32_t) + mca_btl_openib_component.ib_num_btls * (sizeof(uint32_t) + sizeof(mca_btl_openib_port_info_t));
+ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
+ msg_size += strlen(mca_btl_openib_component.openib_btls[i]->port_info.cpclist);
+ }
 
- for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
- mca_btl_openib_module_t *btl = mca_btl_openib_component.openib_btls[i];
- ports[i] = btl->port_info;
+ if (0 == msg_size) {
+ return 0;
+ }
+
+ message = malloc(msg_size);
+ if (NULL == message) {
+ BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
+ return OMPI_ERR_OUT_OF_RESOURCE;
+ }
+
+ /* Pack the number of ports */
+ size = mca_btl_openib_component.ib_num_btls;
 #if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
- MCA_BTL_OPENIB_PORT_INFO_HTON(ports[i]);
+ size = htonl(size);
 #endif
- }
+ memcpy(message, &size, sizeof(size));
+ offset = message + sizeof(size);
+
+ /* Pack each of the ports */
+ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
+ /* Pack the port struct */
+ memcpy(offset, &mca_btl_openib_component.openib_btls[i]->port_info, sizeof(mca_btl_openib_port_info_t));
+#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
+ MCA_BTL_OPENIB_PORT_INFO_HTON(*(mca_btl_openib_port_info_t *)offset);
+#endif
+ offset += sizeof(mca_btl_openib_port_info_t);
+
+ /* Pack the strlen of the cpclist */
+ size = size_save =
+ strlen(mca_btl_openib_component.openib_btls[i]->port_info.cpclist);
+#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
+ size = htonl(size);
+#endif
+ memcpy(offset, &size, sizeof(size));
+ offset += sizeof(size);
+
+ /* Pack the string */
+ memcpy(offset,
+ mca_btl_openib_component.openib_btls[i]->port_info.cpclist,
+ size_save);
+ offset += size_save;
     }
- rc = ompi_modex_send (&mca_btl_openib_component.super.btl_version, ports, size);
- if (NULL != ports) {
- free (ports);
- }
+
+ rc = ompi_modex_send(&mca_btl_openib_component.super.btl_version,
+ message, msg_size);
+ free(message);
+
     return rc;
 }
 
@@ -357,6 +397,8 @@
             lid < ib_port_attr->lid + lmc; lid++){
         for(i = 0; i < mca_btl_openib_component.btls_per_lid; i++){
             char param[40];
+ int rc;
+
             openib_btl = malloc(sizeof(mca_btl_openib_module_t));
             if(NULL == openib_btl) {
                 BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
@@ -383,6 +425,11 @@
                 openib_btl->port_info.lid = lid;
             }
 #endif
+ rc = ompi_btl_openib_connect_base_query(&openib_btl->port_info.cpclist, hca);
+ if (rc != OMPI_SUCCESS) {
+ continue;
+ }
+
             openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
             openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;
 
@@ -1295,10 +1342,6 @@
             return NULL;
      }
 
- /* Setup connect module */
- if (OMPI_SUCCESS != ompi_btl_openib_connect_base_select()) {
- return NULL;
- }
     btl_openib_modex_send();
 
     *num_btl_modules = mca_btl_openib_component.ib_num_btls;
Index: ompi/mca/btl/openib/btl_openib_proc.c
===================================================================
--- ompi/mca/btl/openib/btl_openib_proc.c (revision 17101)
+++ ompi/mca/btl/openib/btl_openib_proc.c (working copy)
@@ -100,17 +100,19 @@
 mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
 {
     mca_btl_openib_proc_t* module_proc = NULL;
- size_t size;
+ size_t msg_size;
+ uint32_t size;
 #if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
     size_t i;
 #endif
     int rc;
+ void *message;
+ char *offset;
     
     /* Check if we have already created a IB proc
      * structure for this ompi process */
     module_proc = mca_btl_openib_proc_lookup_ompi(ompi_proc);
-
- if(module_proc != NULL) {
+ if (NULL != module_proc) {
         /* Gotcha! */
         return module_proc;
     }
@@ -126,48 +128,67 @@
      * size) to represent the proc */
     module_proc->proc_guid = ompi_proc->proc_name;
 
-
     /* query for the peer address info */
- rc = ompi_modex_recv(
- &mca_btl_openib_component.super.btl_version,
- ompi_proc,
- (void*)&module_proc->proc_ports,
- &size
- );
-
-
-
- if(OMPI_SUCCESS != rc) {
+ rc = ompi_modex_recv(&mca_btl_openib_component.super.btl_version,
+ ompi_proc,
+ &message,
+ &msg_size);
+ if (OMPI_SUCCESS != rc) {
         BTL_ERROR(("[%s:%d] ompi_modex_recv failed for peer %s",
                    __FILE__, __LINE__,
                    ORTE_NAME_PRINT(&ompi_proc->proc_name)));
         OBJ_RELEASE(module_proc);
         return NULL;
     }
-
- if((size % sizeof(mca_btl_openib_port_info_t)) != 0) {
- BTL_ERROR(("[%s:%d] invalid module address for peer %s",
- __FILE__, __LINE__,
- ORTE_NAME_PRINT(&ompi_proc->proc_name)));
- OBJ_RELEASE(module_proc);
+ if (0 == msg_size) {
         return NULL;
     }
 
- module_proc->proc_port_count = size/sizeof(mca_btl_openib_port_info_t);
+ /* Message was packed in btl_openib_component.c; the format is
+ listed in a comment in that file */
+ /* Unpack the number of ports in the message */
+ offset = message;
+ memcpy(&(module_proc->proc_port_count), offset, sizeof(uint32_t));
+#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
+ module_proc->proc_port_count = ntohl(module_proc->proc_port_count);
+#endif
+ module_proc->proc_ports = (mca_btl_openib_port_info_t *)malloc(sizeof(mca_btl_openib_port_info_t) * module_proc->proc_port_count);
+ offset += sizeof(uint32_t);
 
+ /* Loop over unpacking all the ports */
+ for (i = 0; i < module_proc->proc_port_count; i++) {
+ /* Unpack the port */
+ memcpy(&module_proc->proc_ports[i], offset,
+ sizeof(mca_btl_openib_port_info_t));
+#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
+ MCA_BTL_OPENIB_PORT_INFO_NTOH(module_proc->proc_ports[i]);
+#endif
+ offset += sizeof(mca_btl_openib_port_info_t);
+
+ /* Unpack the string length */
+ memcpy(&size, offset, sizeof(size));
+#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
+ size = ntohl(size);
+#endif
+ offset += sizeof(size);
+
+ /* Unpack the string */
+ module_proc->proc_ports[i].cpclist = malloc(size + 1);
+ if (NULL == module_proc->proc_ports[i].cpclist) {
+ /* JMS some error */
+ }
+ memcpy(module_proc->proc_ports[i].cpclist, offset, size);
+ module_proc->proc_ports[i].cpclist[size] = '\0';
+ offset += size;
+ }
+
     if (0 == module_proc->proc_port_count) {
         module_proc->proc_endpoints = NULL;
     } else {
         module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
             malloc(module_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*));
     }
-#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
- for(i=0; i < module_proc->proc_port_count; ++i) {
- MCA_BTL_OPENIB_PORT_INFO_NTOH(module_proc->proc_ports[i]);
- }
-#endif
-
- if(NULL == module_proc->proc_endpoints) {
+ if (NULL == module_proc->proc_endpoints) {
         OBJ_RELEASE(module_proc);
         return NULL;
     }
Index: ompi/mca/btl/openib/connect/connect.h
===================================================================
--- ompi/mca/btl/openib/connect/connect.h (revision 17102)
+++ ompi/mca/btl/openib/connect/connect.h (working copy)
@@ -62,16 +62,23 @@
  * main openib BTL will start sending out fragments that were queued
  * while the connection was establing, etc.).
  */
-
 #ifndef BTL_OPENIB_CONNECT_H
 #define BTL_OPENIB_CONNECT_H
 
 BEGIN_C_DECLS
 
+#define BCF_MAX_NAME 64
+
 /**
+ * Must forward declare mca_btl_openib_hca_t; it's defined in
+ * btl_openib.h, but that file includes this file.
+ */
+struct mca_btl_openib_hca_t;
+
+/**
  * Function to register MCA params in the connect functions
  */
-typedef int (*ompi_btl_openib_connect_base_func_open_t)(void);
+typedef void (*ompi_btl_openib_connect_base_func_open_t)(void);
 
 /**
  * Function to intialize the connection functions (i.e., it's been
@@ -86,12 +93,15 @@
     (struct mca_btl_base_endpoint_t *e);
 
 /**
+ * Query the CPC to see if it wants to run on a specific HCA
+ */
+typedef int (*ompi_btl_openib_connect_base_func_query_t)(struct mca_btl_openib_hca_t *hca);
+
+/**
  * Function to finalize the connection functions
  */
 typedef int (*ompi_btl_openib_connect_base_func_finalize_t)(void);
 
-#define BCF_MAX_NAME 64
-
 struct ompi_btl_openib_connect_base_funcs_t {
     /** Name of this set of connection functions */
     char bcf_name[BCF_MAX_NAME];
@@ -105,8 +115,11 @@
     /** Connect function */
     ompi_btl_openib_connect_base_func_start_connect_t bcf_start_connect;
 
+ /** Query function */
+ ompi_btl_openib_connect_base_func_query_t bcf_query;
+
     /** Finalize function */
- ompi_btl_openib_connect_base_func_open_t bcf_finalize;
+ ompi_btl_openib_connect_base_func_finalize_t bcf_finalize;
 };
 typedef struct ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_base_funcs_t;
 
Index: ompi/mca/btl/openib/connect/base.h
===================================================================
--- ompi/mca/btl/openib/connect/base.h (revision 17102)
+++ ompi/mca/btl/openib/connect/base.h (working copy)
@@ -28,7 +28,8 @@
 /*
  * Select function
  */
-int ompi_btl_openib_connect_base_select(void);
+int ompi_btl_openib_connect_base_select(char*, char*);
+int ompi_btl_openib_connect_base_query(char**, mca_btl_openib_hca_t*);
 
 END_C_DECLS
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_base.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_base.c (revision 17102)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_base.c (working copy)
@@ -33,7 +33,9 @@
  */
 static ompi_btl_openib_connect_base_funcs_t *all[] = {
     &ompi_btl_openib_connect_oob,
+#if HAVE_XRC
     &ompi_btl_openib_connect_xoob,
+#endif
     &ompi_btl_openib_connect_rdma_cm,
     &ompi_btl_openib_connect_ibcm,
     NULL
@@ -42,7 +44,8 @@
 /*
  * MCA parameter value
  */
-static char *param = NULL;
+static char *cpc_include = NULL;
+static char *cpc_exclude = NULL;
 
 /*
  * Register MCA parameters
@@ -60,38 +63,26 @@
     a = opal_argv_join(temp, ',');
     opal_argv_free(temp);
     asprintf(&b,
- "Method used to make OpenFabrics connections (valid values: %s)",
+ "Method used to select OpenFabrics connections (valid values: %s)",
              a);
 
- /* For XRC qps we must to use XOOB connection manager */
- if (mca_btl_openib_component.num_xrc_qps > 0) {
- mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "xoob", &param);
- if (0 != strcmp("xoob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "XRC with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_xrc_qps);
- return OMPI_ERROR;
- }
- } else { /* For all others we should use OOB */
- mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "oob", &param);
- if (0 != strcmp("oob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "SRQ or PP with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_srq_qps,
- mca_btl_openib_component.num_pp_qps);
- return OMPI_ERROR;
- }
- }
+ mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
+ "cpc_include", b, false, false, NULL, &cpc_include);
+ free(a);
+ free(b);
 
- /* Call the open function on all the connect modules */
+ asprintf(&b,
+ "Method used to exclude OpenFabrics connections (valid values: %s)",
+ a);
+
+ mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
+ "cpc_exclude", b, false, false, NULL, &cpc_exclude);
+ free(a);
+ free(b);
+
+ /* Call the open function on all the connect modules so that they
+ * may setup any MCA params specific to the connection type
+ */
     for (i = 0; NULL != all[i]; ++i) {
         if (NULL != all[i]->bcf_open) {
             all[i]->bcf_open();
@@ -101,33 +92,134 @@
     return OMPI_SUCCESS;
 }
 
+/*
+ * The connection method is chosen by comparing the lists passed around
+ * to all nodes via modex with the list generated locally. Any
+ * non-negative number is a potentially valid connection method. The
+ * method below of determining the optimal connection method is to take
+ * the cross-section of the two lists. The highest single value (and
+ * the other side being non-negative) is selected as the cpc method.
+ */
+int ompi_btl_openib_connect_base_select(char *remotelist, char *locallist)
+{
+ int i, j, max = -1;
+ char **localist_formatted, **remotelist_formatted;
+ char *name;
 
-int ompi_btl_openib_connect_base_select(void)
+ BTL_VERBOSE(("remotelist = %s locallist = %s\n", remotelist, locallist));
+
+ localist_formatted = opal_argv_split(locallist, ',');
+ remotelist_formatted = opal_argv_split(remotelist, ',');
+
+ for (i = 0; NULL != localist_formatted[i] && NULL != localist_formatted[i+1]; i+=2) {
+ for (j = 0; NULL != remotelist_formatted[j] && NULL != remotelist_formatted[j+1]; j+=2) {
+ int local_val, remote_val;
+
+ local_val = atoi(localist_formatted[i+1]);
+ remote_val = atoi(remotelist_formatted[j+1]);
+
+ if (0 == strcmp(localist_formatted[i], remotelist_formatted[j]) &&
+ (-1 != local_val && -1 != remote_val)) {
+ if (local_val > max) {
+ max = local_val;
+ name = localist_formatted[i];
+ }
+ if (remote_val > max) {
+ max = remote_val;
+ name = remotelist_formatted[j];
+ }
+ }
+ }
+ }
+
+ if (-1 == max) {
+ BTL_ERROR(("Failed to find any working connections"));
+ return OMPI_ERROR;
+ }
+
+ for (i = 0; NULL != all[i]; i++) {
+ if (0 == strcmp(all[i]->bcf_name, name)) {
+ all[i]->bcf_init();
+ ompi_btl_openib_connect = *(all[i]);
+ break;
+ }
+ }
+
+ BTL_VERBOSE(("%s selected as transport\n", all[i]->bcf_name));
+
+ opal_argv_free(localist_formatted);
+ opal_argv_free(remotelist_formatted);
+
+ return OMPI_SUCCESS;
+}
+
+static inline int cpc_specific_query(char ***cpclist, mca_btl_openib_hca_t *hca, int cpc_counter, bool *valid)
 {
- int i;
+ char *temp;
+ int rc;
 
- /* Go through all the pseudo-components; if the btl_openib_connect
- param is empty, then take the first one that returns
- OMPI_SUCCESS from its init function. If
- btl_openib_connect_param is not empty, find that one and ensure
- that its init function returns OMPI_SUCCESS. */
- if (NULL != param && '\0' == param[0]) {
- param = NULL;
+ if (NULL == all[cpc_counter]->bcf_query) {
+ return OMPI_SUCCESS;
     }
+
+ rc = all[cpc_counter]->bcf_query(hca);
+ if (rc > 0) {
+ *valid = 1;
+ }
+
+ asprintf(&temp, "%s,%d", all[cpc_counter]->bcf_name, rc);
+ opal_argv_append_nosize(cpclist, temp);
+ return OMPI_SUCCESS;
+}
+
+int ompi_btl_openib_connect_base_query(char **cpclist, mca_btl_openib_hca_t *hca)
+{
+ int i, rc;
+ bool valid = 0;
+ char **cpclist_include, **cpclist_exclude, **namepriority_list = NULL;
+
+ cpclist_include = opal_argv_split(cpc_include, ',');
+ cpclist_exclude = opal_argv_split(cpc_exclude, ',');
+
+ /* Go through all the CMs to create a list of usable CPCs */
     for (i = 0; NULL != all[i]; ++i) {
- if ((NULL != param && 0 == strcmp(all[i]->bcf_name, param)) ||
- (NULL == param)) {
- if (NULL != all[i]->bcf_init &&
- OMPI_SUCCESS == all[i]->bcf_init()) {
- ompi_btl_openib_connect = *(all[i]);
- break;
+ if (NULL != cpclist_include) {
+ int j;
+ for (j = 0; NULL != cpclist_include[j]; ++j) {
+ if (0 == strcmp(cpclist_include[j], all[i]->bcf_name)) {
+ rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
+ if (OMPI_ERROR == rc) {
+ return OMPI_ERROR;
+ }
+ }
             }
+ } else if (NULL != cpclist_exclude) {
+ int j;
+ for (j = 0; NULL != cpclist_exclude[j]; ++j) {
+ if (0 != strcmp(cpclist_exclude[j], all[i]->bcf_name)) {
+ rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
+ if (OMPI_ERROR == rc) {
+ return OMPI_ERROR;
+ }
+ }
+ }
+ } else {
+ rc = cpc_specific_query(&namepriority_list, hca, i, &valid);
+ if (OMPI_ERROR == rc) {
+ return OMPI_ERROR;
+ }
         }
     }
- if (NULL == all[i]) {
- /* JMS opal_show_help */
- return OMPI_ERR_NOT_FOUND;
+
+ if (0 == valid) {
+ BTL_ERROR(("Failed to find any valid connections for %s, not "
+ "using it for this run",
+ ibv_get_device_name(hca->ib_dev)));
+ return OMPI_ERROR;
     }
 
+ *cpclist = opal_argv_join(namepriority_list, ',');
+ opal_argv_free(namepriority_list);
+
     return OMPI_SUCCESS;
 }
Index: ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (revision 17102)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (working copy)
@@ -13,7 +13,7 @@
 #include "btl_openib_endpoint.h"
 #include "connect/connect.h"
 
-static int ibcm_open(void);
+static void ibcm_open(void);
 static int ibcm_init(void);
 static int ibcm_connect(mca_btl_base_endpoint_t *e);
 static int ibcm_finalize(void);
@@ -23,17 +23,16 @@
     ibcm_open,
     ibcm_init,
     ibcm_connect,
+ NULL,
     ibcm_finalize,
 };
 
-static int ibcm_open(void)
+static void ibcm_open(void)
 {
     mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_ibcm_foo",
+ "connect_ibcm_foo",
                            "A dummy help message", false, false,
                            17, NULL);
-
- return OMPI_SUCCESS;
 }
 
 static int ibcm_init(void)
Index: ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (revision 17102)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (working copy)
@@ -39,8 +39,12 @@
     ENDPOINT_CONNECT_ACK
 } connect_message_type_t;
 
+static int oob_priority = 50;
+
+static void oob_open(void);
 static int oob_init(void);
 static int oob_start_connect(mca_btl_base_endpoint_t *e);
+static int oob_query(mca_btl_openib_hca_t *hca);
 static int oob_finalize(void);
 
 static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint,
@@ -67,16 +71,33 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
     "oob",
- /* No need for "open */
- NULL,
+ /* Open */
+ oob_open,
     /* Init */
     oob_init,
     /* Connect */
     oob_start_connect,
+ /* Query */
+ oob_query,
     /* Finalize */
     oob_finalize,
 };
 
+/* Open - this functions sets up any oob specific commandline params */
+static void oob_open(void)
+{
+ mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
+ "connect_oob_priority",
+ "The selection method priority for oob",
+ false, false, oob_priority, &oob_priority);
+
+ if (oob_priority > 100) {
+ oob_priority = 100;
+ } else if (oob_priority < -1) {
+ oob_priority = -1;
+ }
+}
+
 /*
  * Init function. Post non-blocking RML receive to accept incoming
  * connection requests.
@@ -118,6 +139,15 @@
     return OMPI_SUCCESS;
 }
 
+static int oob_query(mca_btl_openib_hca_t *hca)
+{
+ if (IBV_TRANSPORT_IB == hca->ib_dev->transport_type) {
+ return oob_priority;
+ }
+
+ return -1;
+}
+
 /*
  * Finalize function. Cleanup RML non-blocking receive.
  */
Index: ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (revision 17102)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (working copy)
@@ -13,9 +13,10 @@
 #include "btl_openib_endpoint.h"
 #include "connect/connect.h"
 
-static int rdma_cm_open(void);
+static void rdma_cm_open(void);
 static int rdma_cm_init(void);
 static int rdma_cm_connect(mca_btl_base_endpoint_t *e);
+static int rdma_cm_query(mca_btl_openib_hca_t *hca);
 static int rdma_cm_finalize(void);
 
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_rdma_cm = {
@@ -23,17 +24,17 @@
     rdma_cm_open,
     rdma_cm_init,
     rdma_cm_connect,
+ rdma_cm_query,
     rdma_cm_finalize,
 };
 
-static int rdma_cm_open(void)
+/* Open - this functions sets up any rdma_cm specific commandline params */
+static void rdma_cm_open(void)
 {
     mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_rdma_cm_foo",
+ "connect_rdma_cm_foo",
                            "A dummy help message", false, false,
                            17, NULL);
-
- return OMPI_SUCCESS;
 }
 
 static int rdma_cm_init(void)
@@ -48,6 +49,16 @@
     return OMPI_ERR_NOT_IMPLEMENTED;
 }
 
+static int rdma_cm_query(mca_btl_openib_hca_t *hca)
+{
+ if (IBV_TRANSPORT_IWARP == hca->ib_dev->transport_type) {
+ /* Not currently supported */
+ return -1;
+ }
+
+ return -1;
+}
+
 static int rdma_cm_finalize(void)
 {
     printf("rdma cm finalize\n");
Index: ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (revision 17102)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (working copy)
@@ -22,8 +22,10 @@
 #include "btl_openib_xrc.h"
 #include "connect/connect.h"
 
+static void xoob_open(void);
 static int xoob_init(void);
 static int xoob_start_connect(mca_btl_base_endpoint_t *e);
+static int xoob_query(mca_btl_openib_hca_t *hca);
 static int xoob_finalize(void);
 
 /*
@@ -32,18 +34,18 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
     "xoob",
- /* No need for "open */
- NULL,
+ /* Open */
+ xoob_open,
     /* Init */
     xoob_init,
     /* Connect */
     xoob_start_connect,
+ /* Query */
+ xoob_query,
     /* Finalize */
     xoob_finalize,
 };
 
-#if HAVE_XRC
-
 typedef enum {
     SEND,
     RECV
@@ -99,6 +101,24 @@
 
 static int init_rem_info(mca_btl_openib_rem_info_t *rem_info);
 static void free_rem_info(mca_btl_openib_rem_info_t *rem_info);
+
+static int xoob_priority = 60;
+
+/* Open - this functions sets up any xoob specific commandline params */
+static void xoob_open(void)
+{
+ mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
+ "connect_xoob_priority",
+ "The selection method priority for xoob",
+ false, false, xoob_priority, &xoob_priority);
+
+ if (xoob_priority > 100) {
+ xoob_priority = 100;
+ } else if (xoob_priority < -1) {
+ xoob_priority = -1;
+ }
+}
+
 /*
  * Init function. Post non-blocking RML receive to accept incoming
  * connection requests.
@@ -173,6 +193,15 @@
     return rc;
 }
 
+static int xoob_query(btl_openib_hca_t *hca)
+{
+ if (mca_btl_openib_component.num_xrc_qps > 0) {
+ return xoob_priority;
+ }
+
+ return -1;
+}
+
 /*
  * Finalize function. Cleanup RML non-blocking receive.
  */
@@ -836,24 +865,3 @@
         free(rem_info->rem_srqs);
     }
 }
-
-#else
-/* In case if the XRC was disabled during compilation we will print message and return error */
-static int xoob_init(void)
-{
- printf("xoob init\n");
- return OMPI_ERR_NOT_IMPLEMENTED;
-}
-
-static int xoob_start_connect(mca_btl_base_endpoint_t *e)
-{
- printf("xoob start connect\n");
- return OMPI_ERR_NOT_IMPLEMENTED;
-}
-
-static int xoob_finalize(void)
-{
- printf("xoob finalize\n");
- return OMPI_ERR_NOT_IMPLEMENTED;
-}
-#endif
Index: ompi/mca/btl/openib/btl_openib.c
===================================================================
--- ompi/mca/btl/openib/btl_openib.c (revision 17101)
+++ ompi/mca/btl/openib/btl_openib.c (working copy)
@@ -322,6 +322,15 @@
         /* check if the remote proc has a reachable subnet first */
         BTL_VERBOSE(("got %d port_infos \n", ib_proc->proc_port_count));
         for(j = 0; j < (int) ib_proc->proc_port_count; j++){
+ int rc;
+
+ /* Setup connect module */
+ rc = ompi_btl_openib_connect_base_select(ib_proc->proc_ports[j].cpclist,
+ openib_btl->port_info.cpclist);
+ if (rc != OMPI_SUCCESS) {
+ continue;
+ }
+
             BTL_VERBOSE(("got a subnet %016x\n",
                          ib_proc->proc_ports[j].subnet_id));
             if(ib_proc->proc_ports[j].subnet_id ==
Index: ompi/mca/btl/openib/configure.m4
===================================================================
--- ompi/mca/btl/openib/configure.m4 (revision 17101)
+++ ompi/mca/btl/openib/configure.m4 (working copy)
@@ -18,7 +18,15 @@
 # $HEADER$
 #
 
+# MCA_btl_openib_POST_CONFIG([should_build])
+# ------------------------------------------
+AC_DEFUN([MCA_btl_openib_POST_CONFIG], [
+ AS_IF([test $1 -eq 0 -a "$enable_dist" = "yes"],
+ [AC_MSG_ERROR([BTL openib is disabled but --enable-dist specifed. This will result in a bad tarball. Aborting configure.])])
+ AM_CONDITIONAL([MCA_btl_openib_have_xrc], [test $1 -eq 1 -a "x$btl_openib_have_xrc" = "x1" -a "x$ompi_want_connectx_xrc" = "x1"])
+])
 
+
 # MCA_btl_openib_CONFIG([action-if-can-compile],
 # [action-if-cant-compile])
 # ------------------------------------------------
Index: ompi/mca/btl/openib/btl_openib.h
===================================================================
--- ompi/mca/btl/openib/btl_openib.h (revision 17101)
+++ ompi/mca/btl/openib/btl_openib.h (working copy)
@@ -46,6 +46,8 @@
 #include "ompi/mca/btl/btl.h"
 #include "ompi/mca/btl/base/base.h"
 
+#include "connect/connect.h"
+
 BEGIN_C_DECLS
 
 #define HAVE_XRC (defined(HAVE_IBV_OPEN_XRC_DOMAIN) && (1 == OMPI_ENABLE_CONNECTX_XRC_SUPPORT))
@@ -229,6 +231,7 @@
 #if HAVE_XRC
     uint16_t lid; /* used only in xrc */
 #endif
+ char *cpclist;
 };
 typedef struct mca_btl_openib_port_info mca_btl_openib_port_info_t;
 
Index: ompi/mca/btl/openib/Makefile.am
===================================================================
--- ompi/mca/btl/openib/Makefile.am (revision 17101)
+++ ompi/mca/btl/openib/Makefile.am (working copy)
@@ -53,14 +53,18 @@
     connect/btl_openib_connect_base.c \
     connect/btl_openib_connect_oob.c \
     connect/btl_openib_connect_oob.h \
- connect/btl_openib_connect_xoob.c \
- connect/btl_openib_connect_xoob.h \
     connect/btl_openib_connect_rdma_cm.c \
     connect/btl_openib_connect_rdma_cm.h \
     connect/btl_openib_connect_ibcm.c \
     connect/btl_openib_connect_ibcm.h \
     connect/connect.h
 
+if MCA_btl_openib_have_xrc
+sources += \
+ connect/btl_openib_connect_xoob.c \
+ connect/btl_openib_connect_xoob.h
+endif
+
 # Make the output library in this directory, and name it either
 # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
 # (for static builds).