Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] [PATCH] openib: clean-up connect to allow for new cm's
From: Jon Mason (jon_at_[hidden])
Date: 2007-12-11 18:59:03


Currently, alternate CMs cannot be called because
ompi_btl_openib_connect_base_open forces a choice of either oob or xoob
(and goes into an erroneous error path if you pick something else).
This patch reorganizes ompi_btl_openib_connect_base_open so that new
functions can easily be added. New Open functions were added to oob
and xoob for the error handling.

I tested calling oob, xoob, and rdma_cm. oob happily allows connections
to be established and throws no errors. xoob fails because ompi does
not have it compiled in (and I have no connectx cards). rdma_cm calls
the empty hooks and exits without connecting (thus throwing
non-connection errors). All expected behavior.

Since this patch fixes the existing behavior, and is not necessarily
tied to my implementing of rdma_cm, I think it is acceptable to go in
now.

Thanks,
Jon

Index: ompi/mca/btl/openib/connect/btl_openib_connect_base.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_base.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_base.c (working copy)
@@ -50,8 +50,8 @@
  */
 int ompi_btl_openib_connect_base_open(void)
 {
- int i;
- char **temp, *a, *b;
+ char **temp, *a, *b, *defval;
+ int i, ret = OMPI_ERROR;
 
     /* Make an MCA parameter to select which connect module to use */
     temp = NULL;
@@ -66,40 +66,23 @@
 
     /* For XRC qps we must to use XOOB connection manager */
     if (mca_btl_openib_component.num_xrc_qps > 0) {
- mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "xoob", &param);
- if (0 != strcmp("xoob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "XRC with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_xrc_qps);
- return OMPI_ERROR;
- }
+ defval = "xoob";
     } else { /* For all others we should use OOB */
- mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "oob", &param);
- if (0 != strcmp("oob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "SRQ or PP with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_srq_qps,
- mca_btl_openib_component.num_pp_qps);
- return OMPI_ERROR;
- }
+ defval = "oob";
     }
 
+ mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
+ "connect", b, false, false, defval, &param);
+
     /* Call the open function on all the connect modules */
     for (i = 0; NULL != all[i]; ++i) {
- if (NULL != all[i]->bcf_open) {
- all[i]->bcf_open();
+ if (0 == strcmp(all[i]->bcf_name, param)) {
+ ret = all[i]->bcf_open();
+ break;
         }
     }
 
- return OMPI_SUCCESS;
+ return ret;
 }
 
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (working copy)
@@ -28,11 +28,7 @@
 
 static int ibcm_open(void)
 {
- mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_ibcm_foo",
- "A dummy help message", false, false,
- 17, NULL);
-
+ printf("ibcm open\n");
     return OMPI_SUCCESS;
 }
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (working copy)
@@ -22,6 +22,8 @@
 
 #include "ompi_config.h"
 
+#include "opal/util/show_help.h"
+
 #include "orte/mca/ns/base/base.h"
 #include "orte/mca/oob/base/base.h"
 #include "orte/mca/rml/rml.h"
@@ -39,6 +41,7 @@
     ENDPOINT_CONNECT_ACK
 } connect_message_type_t;
 
+static int oob_open(void);
 static int oob_init(void);
 static int oob_start_connect(mca_btl_base_endpoint_t *e);
 static int oob_finalize(void);
@@ -67,8 +70,8 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
     "oob",
- /* No need for "open */
- NULL,
+ /* Open */
+ oob_open,
     /* Init */
     oob_init,
     /* Connect */
@@ -78,6 +81,23 @@
 };
 
 /*
+ * Open function.
+ */
+static int oob_open(void)
+{
+ if (mca_btl_openib_component.num_xrc_qps > 0) {
+ opal_show_help("help-mpi-btl-openib.txt",
+ "SRQ or PP with wrong OOB", true,
+ orte_system_info.nodename,
+ mca_btl_openib_component.num_srq_qps,
+ mca_btl_openib_component.num_pp_qps);
+ return OMPI_ERROR;
+ }
+
+ return OMPI_SUCCESS;
+}
+
+/*
  * Init function. Post non-blocking RML receive to accept incoming
  * connection requests.
  */
Index: ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (working copy)
@@ -28,11 +28,7 @@
 
 static int rdma_cm_open(void)
 {
- mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_rdma_cm_foo",
- "A dummy help message", false, false,
- 17, NULL);
-
+ printf("rdma cm open\n");
     return OMPI_SUCCESS;
 }
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (working copy)
@@ -10,6 +10,8 @@
 
 #include "ompi_config.h"
 
+#include "opal/util/show_help.h"
+
 #include "orte/mca/ns/base/base.h"
 #include "orte/mca/oob/base/base.h"
 #include "orte/mca/rml/rml.h"
@@ -22,6 +24,7 @@
 #include "btl_openib_xrc.h"
 #include "connect/connect.h"
 
+static int xoob_open(void);
 static int xoob_init(void);
 static int xoob_start_connect(mca_btl_base_endpoint_t *e);
 static int xoob_finalize(void);
@@ -32,8 +35,8 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
     "xoob",
- /* No need for "open */
- NULL,
+ /* Open */
+ xoob_open,
     /* Init */
     xoob_init,
     /* Connect */
@@ -99,7 +102,24 @@
 
 static int init_rem_info(mca_btl_openib_rem_info_t *rem_info);
 static void free_rem_info(mca_btl_openib_rem_info_t *rem_info);
+
 /*
+ * Open function.
+ */
+static int xoob_open(void)
+{
+ if (mca_btl_openib_component.num_xrc_qps <= 0) {
+ opal_show_help("help-mpi-btl-openib.txt",
+ "XRC with wrong OOB", true,
+ orte_system_info.nodename,
+ mca_btl_openib_component.num_xrc_qps);
+ return OMPI_ERROR;
+ }
+
+ return OMPI_SUCCESS;
+}
+
+/*
  * Init function. Post non-blocking RML receive to accept incoming
  * connection requests.
  */
@@ -834,6 +854,12 @@
 
 #else
 /* In case if the XRC was disabled during compilation we will print message and return error */
+static int xoob_open(void)
+{
+ printf("xoob open\n");
+ return OMPI_ERR_NOT_IMPLEMENTED;
+}
+
 static int xoob_init(void)
 {
     printf("xoob init\n");