Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |  

This web mail archive is frozen.

This page is part of a frozen web archive of this mailing list.

You can still navigate around this archive, but know that no new mails have been added to it since July of 2016.

Click here to be taken to the new web archives of this list; it includes all the mails that are in this frozen archive plus all new mails that have been sent to the list since it was migrated to the new archives.

Subject: [OMPI devel] [PATCH] openib: clean-up connect to allow for new cm's
From: Jon Mason (jon_at_[hidden])
Date: 2007-12-11 18:59:03


Currently, alternate CMs cannot be called because
ompi_btl_openib_connect_base_open forces a choice of either oob or xoob
(and goes into an erroneous error path if you pick something else).
This patch reorganizes ompi_btl_openib_connect_base_open so that new
functions can easily be added. New Open functions were added to oob
and xoob for the error handling.

I tested calling oob, xoob, and rdma_cm. oob happily allows connections
to be established and throws no errors. xoob fails because ompi does
not have it compiled in (and I have no connectx cards). rdma_cm calls
the empty hooks and exits without connecting (thus throwing
non-connection errors). All expected behavior.

Since this patch fixes the existing behavior, and is not necessarily
tied to my implementing of rdma_cm, I think it is acceptable to go in
now.

Thanks,
Jon

Index: ompi/mca/btl/openib/connect/btl_openib_connect_base.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_base.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_base.c (working copy)
@@ -50,8 +50,8 @@
  */
 int ompi_btl_openib_connect_base_open(void)
 {
- int i;
- char **temp, *a, *b;
+ char **temp, *a, *b, *defval;
+ int i, ret = OMPI_ERROR;
 
     /* Make an MCA parameter to select which connect module to use */
     temp = NULL;
@@ -66,40 +66,23 @@
 
     /* For XRC qps we must to use XOOB connection manager */
     if (mca_btl_openib_component.num_xrc_qps > 0) {
- mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "xoob", &param);
- if (0 != strcmp("xoob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "XRC with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_xrc_qps);
- return OMPI_ERROR;
- }
+ defval = "xoob";
     } else { /* For all others we should use OOB */
- mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "oob", &param);
- if (0 != strcmp("oob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "SRQ or PP with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_srq_qps,
- mca_btl_openib_component.num_pp_qps);
- return OMPI_ERROR;
- }
+ defval = "oob";
     }
 
+ mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
+ "connect", b, false, false, defval, &param);
+
     /* Call the open function on all the connect modules */
     for (i = 0; NULL != all[i]; ++i) {
- if (NULL != all[i]->bcf_open) {
- all[i]->bcf_open();
+ if (0 == strcmp(all[i]->bcf_name, param)) {
+ ret = all[i]->bcf_open();
+ break;
         }
     }
 
- return OMPI_SUCCESS;
+ return ret;
 }
 
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (working copy)
@@ -28,11 +28,7 @@
 
 static int ibcm_open(void)
 {
- mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_ibcm_foo",
- "A dummy help message", false, false,
- 17, NULL);
-
+ printf("ibcm open\n");
     return OMPI_SUCCESS;
 }
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (working copy)
@@ -22,6 +22,8 @@
 
 #include "ompi_config.h"
 
+#include "opal/util/show_help.h"
+
 #include "orte/mca/ns/base/base.h"
 #include "orte/mca/oob/base/base.h"
 #include "orte/mca/rml/rml.h"
@@ -39,6 +41,7 @@
     ENDPOINT_CONNECT_ACK
 } connect_message_type_t;
 
+static int oob_open(void);
 static int oob_init(void);
 static int oob_start_connect(mca_btl_base_endpoint_t *e);
 static int oob_finalize(void);
@@ -67,8 +70,8 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
     "oob",
- /* No need for "open */
- NULL,
+ /* Open */
+ oob_open,
     /* Init */
     oob_init,
     /* Connect */
@@ -78,6 +81,23 @@
 };
 
 /*
+ * Open function.
+ */
+static int oob_open(void)
+{
+ if (mca_btl_openib_component.num_xrc_qps > 0) {
+ opal_show_help("help-mpi-btl-openib.txt",
+ "SRQ or PP with wrong OOB", true,
+ orte_system_info.nodename,
+ mca_btl_openib_component.num_srq_qps,
+ mca_btl_openib_component.num_pp_qps);
+ return OMPI_ERROR;
+ }
+
+ return OMPI_SUCCESS;
+}
+
+/*
  * Init function. Post non-blocking RML receive to accept incoming
  * connection requests.
  */
Index: ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (working copy)
@@ -28,11 +28,7 @@
 
 static int rdma_cm_open(void)
 {
- mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_rdma_cm_foo",
- "A dummy help message", false, false,
- 17, NULL);
-
+ printf("rdma cm open\n");
     return OMPI_SUCCESS;
 }
 
Index: ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (working copy)
@@ -10,6 +10,8 @@
 
 #include "ompi_config.h"
 
+#include "opal/util/show_help.h"
+
 #include "orte/mca/ns/base/base.h"
 #include "orte/mca/oob/base/base.h"
 #include "orte/mca/rml/rml.h"
@@ -22,6 +24,7 @@
 #include "btl_openib_xrc.h"
 #include "connect/connect.h"
 
+static int xoob_open(void);
 static int xoob_init(void);
 static int xoob_start_connect(mca_btl_base_endpoint_t *e);
 static int xoob_finalize(void);
@@ -32,8 +35,8 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
     "xoob",
- /* No need for "open */
- NULL,
+ /* Open */
+ xoob_open,
     /* Init */
     xoob_init,
     /* Connect */
@@ -99,7 +102,24 @@
 
 static int init_rem_info(mca_btl_openib_rem_info_t *rem_info);
 static void free_rem_info(mca_btl_openib_rem_info_t *rem_info);
+
 /*
+ * Open function.
+ */
+static int xoob_open(void)
+{
+ if (mca_btl_openib_component.num_xrc_qps <= 0) {
+ opal_show_help("help-mpi-btl-openib.txt",
+ "XRC with wrong OOB", true,
+ orte_system_info.nodename,
+ mca_btl_openib_component.num_xrc_qps);
+ return OMPI_ERROR;
+ }
+
+ return OMPI_SUCCESS;
+}
+
+/*
  * Init function. Post non-blocking RML receive to accept incoming
  * connection requests.
  */
@@ -834,6 +854,12 @@
 
 #else
 /* In case if the XRC was disabled during compilation we will print message and return error */
+static int xoob_open(void)
+{
+ printf("xoob open\n");
+ return OMPI_ERR_NOT_IMPLEMENTED;
+}
+
 static int xoob_init(void)
 {
     printf("xoob init\n");