Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r29703 - in trunk: contrib/platform/iu/odin ompi/mca/btl/openib ompi/mca/btl/openib/connect
From: Jeff Squyres (jsquyres) (jsquyres_at_[hidden])
Date: 2013-11-14 06:44:15


Does the openib *only* work with RDMACM now?

That's surprising (and bad!).

Did someone ask Mellanox about fixing the OOB and XOOB CPCs?

On Nov 13, 2013, at 11:16 PM, svn-commit-mailer_at_[hidden] wrote:

> Author: rhc (Ralph Castain)
> Date: 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013)
> New Revision: 29703
> URL: https://svn.open-mpi.org/trac/ompi/changeset/29703
>
> Log:
> Given that the oob and xoob cpc's are no longer operable and haven't been since the OOB update, remove them to avoid confusion
>
> cmr:v1.7.4:reviewer=hjelmn:subject=Remove stale cpcs from openib
>
> Deleted:
> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.h
> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.h
> Text files modified:
> trunk/contrib/platform/iu/odin/optimized.conf | 1
> trunk/contrib/platform/iu/odin/static.conf | 1
> trunk/ompi/mca/btl/openib/Makefile.am | 10
> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_base.c | 14
> /dev/null | 975 ---------------------------------
> /dev/null | 18
> /dev/null | 1150 ----------------------------------------
> /dev/null | 19
> 8 files changed, 5 insertions(+), 2183 deletions(-)
>
> Modified: trunk/contrib/platform/iu/odin/optimized.conf
> ==============================================================================
> --- trunk/contrib/platform/iu/odin/optimized.conf Wed Nov 13 19:34:15 2013 (r29702)
> +++ trunk/contrib/platform/iu/odin/optimized.conf 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29703)
> @@ -80,7 +80,6 @@
>
> ## Setup OpenIB
> btl_openib_want_fork_support = 0
> -btl_openib_cpc_include = oob
> #btl_openib_receive_queues = P,128,256,64,32,32:S,2048,1024,128,32:S,12288,1024,128,32:S,65536,1024,128,32
>
> ## Setup TCP
>
> Modified: trunk/contrib/platform/iu/odin/static.conf
> ==============================================================================
> --- trunk/contrib/platform/iu/odin/static.conf Wed Nov 13 19:34:15 2013 (r29702)
> +++ trunk/contrib/platform/iu/odin/static.conf 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29703)
> @@ -80,7 +80,6 @@
>
> ## Setup OpenIB
> btl_openib_want_fork_support = 0
> -btl_openib_cpc_include = oob
> #btl_openib_receive_queues = P,128,256,64,32,32:S,2048,1024,128,32:S,12288,1024,128,32:S,65536,1024,128,32
>
> ## Setup TCP
>
> Modified: trunk/ompi/mca/btl/openib/Makefile.am
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/Makefile.am Wed Nov 13 19:34:15 2013 (r29702)
> +++ trunk/ompi/mca/btl/openib/Makefile.am 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29703)
> @@ -14,6 +14,7 @@
> # Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
> # Copyright (c) 2011 Mellanox Technologies. All rights reserved.
> # Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
> +# Copyright (c) 2013 Intel, Inc. All rights reserved.
> # $COPYRIGHT$
> #
> # Additional copyrights may follow
> @@ -60,8 +61,6 @@
> btl_openib_ip.c \
> connect/base.h \
> connect/btl_openib_connect_base.c \
> - connect/btl_openib_connect_oob.c \
> - connect/btl_openib_connect_oob.h \
> connect/btl_openib_connect_empty.c \
> connect/btl_openib_connect_empty.h \
> connect/connect.h
> @@ -73,13 +72,6 @@
> btl_openib_failover.h
> endif
>
> -# If we have XRC support, build that CPC
> -if MCA_btl_openib_have_xrc
> -sources += \
> - connect/btl_openib_connect_xoob.c \
> - connect/btl_openib_connect_xoob.h
> -endif
> -
> # If we have rdmacm support, build that CPC
> if MCA_btl_openib_have_rdmacm
> sources += \
>
> Modified: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_base.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_base.c Wed Nov 13 19:34:15 2013 (r29702)
> +++ trunk/ompi/mca/btl/openib/connect/btl_openib_connect_base.c 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29703)
> @@ -17,11 +17,7 @@
> #include "btl_openib.h"
> #include "btl_openib_proc.h"
> #include "connect/base.h"
> -#include "connect/btl_openib_connect_oob.h"
> #include "connect/btl_openib_connect_empty.h"
> -#if HAVE_XRC
> -#include "connect/btl_openib_connect_xoob.h"
> -#endif
> #if OMPI_HAVE_RDMACM && OPAL_HAVE_THREADS
> #include "connect/btl_openib_connect_rdmacm.h"
> #endif
> @@ -37,15 +33,13 @@
> * Array of all possible connection functions
> */
> static ompi_btl_openib_connect_base_component_t *all[] = {
> - &ompi_btl_openib_connect_oob,
> + /* Always have an entry here so that the CP indexes will always be
> + the same: OOB has been removed, so use the "empty" CPC */
> + &ompi_btl_openib_connect_empty,
>
> /* Always have an entry here so that the CP indexes will always be
> - the same: if XRC is not available, use the "empty" CPC */
> -#if HAVE_XRC
> - &ompi_btl_openib_connect_xoob,
> -#else
> + the same: XOOB has been removed, so use the "empty" CPC */
> &ompi_btl_openib_connect_empty,
> -#endif
>
> /* Always have an entry here so that the CP indexes will always be
> the same: if RDMA CM is not available, use the "empty" CPC */
>
> Deleted: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29702)
> +++ /dev/null 00:00:00 1970 (deleted)
> @@ -1,975 +0,0 @@
> -/*
> - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
> - * University Research and Technology
> - * Corporation. All rights reserved.
> - * Copyright (c) 2004-2011 The University of Tennessee and The University
> - * of Tennessee Research Foundation. All rights
> - * reserved.
> - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
> - * University of Stuttgart. All rights reserved.
> - * Copyright (c) 2004-2005 The Regents of the University of California.
> - * All rights reserved.
> - * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
> - * Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights
> - * reserved.
> - * Copyright (c) 2008-2013 Mellanox Technologies. All rights reserved.
> - * Copyright (c) 2009-2011 IBM Corporation. All rights reserved.
> - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
> - *
> - * $COPYRIGHT$
> - *
> - * Additional copyrights may follow
> - *
> - * $HEADER$
> - */
> -
> -#include "ompi_config.h"
> -
> -#include "opal/dss/dss.h"
> -#include "opal_stdint.h"
> -#include "opal/util/error.h"
> -#include "opal/util/output.h"
> -#include "opal/util/show_help.h"
> -
> -#include "ompi/mca/rte/rte.h"
> -#include "btl_openib.h"
> -#include "btl_openib_endpoint.h"
> -#include "btl_openib_proc.h"
> -#include "connect/connect.h"
> -
> -#if (ENABLE_DYNAMIC_SL)
> -#include "connect/btl_openib_connect_sl.h"
> -#endif
> -
> -#ifdef HAVE_UNISTD_H
> -#include <unistd.h>
> -#endif
> -
> -typedef enum {
> - ENDPOINT_CONNECT_REQUEST,
> - ENDPOINT_CONNECT_RESPONSE,
> - ENDPOINT_CONNECT_ACK
> -} connect_message_type_t;
> -
> -static int oob_priority = 0;
> -static bool rml_recv_posted = false;
> -
> -static void oob_component_register(void);
> -static int oob_component_query(mca_btl_openib_module_t *openib_btl,
> - ompi_btl_openib_connect_base_module_t **cpc);
> -static int oob_component_finalize(void);
> -
> -static int oob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
> - mca_btl_base_endpoint_t *endpoint);
> -static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint,
> - mca_btl_openib_rem_info_t *rem_info);
> -static int set_remote_info(mca_btl_base_endpoint_t* endpoint,
> - mca_btl_openib_rem_info_t* rem_info);
> -static int qp_connect_all(mca_btl_base_endpoint_t* endpoint);
> -static int qp_create_all(mca_btl_base_endpoint_t* endpoint);
> -static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
> - struct ibv_srq *srq, uint32_t max_recv_wr, uint32_t max_send_wr);
> -static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
> - uint8_t message_type);
> -
> -static void rml_send_cb(int status, ompi_process_name_t* endpoint,
> - opal_buffer_t* buffer, ompi_rml_tag_t tag,
> - void* cbdata);
> -static void rml_recv_cb(int status, ompi_process_name_t* process_name,
> - opal_buffer_t* buffer, ompi_rml_tag_t tag,
> - void* cbdata);
> -
> -/*
> - * The "component" struct -- the top-level function pointers for the
> - * oob connection scheme.
> - */
> -ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_oob = {
> - "oob",
> - /* Register */
> - oob_component_register,
> - /* Init */
> - NULL,
> - /* Query */
> - oob_component_query,
> - /* Finalize */
> - oob_component_finalize,
> -};
> -
> -/* Open - this functions sets up any oob specific commandline params */
> -static void oob_component_register(void)
> -{
> - /* the priority is initialized in the declaration above */
> - (void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
> - "connect_oob_priority",
> - "The selection method priority for oob",
> - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
> - OPAL_INFO_LVL_9,
> - MCA_BASE_VAR_SCOPE_READONLY,
> - &oob_priority);
> -
> - if (oob_priority > 100) {
> - oob_priority = 100;
> - } else if (oob_priority < -1) {
> - oob_priority = -1;
> - }
> -}
> -
> -/*
> - * Init function. Post non-blocking RML receive to accept incoming
> - * connection requests.
> - */
> -static int oob_component_query(mca_btl_openib_module_t *btl,
> - ompi_btl_openib_connect_base_module_t **cpc)
> -{
> - /* If we have the transport_type member, check to ensure we're on
> - IB (this CPC will not work with iWarp). If we do not have the
> - transport_type member, then we must be < OFED v1.2, and
> - therefore we must be IB. */
> -#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE) && HAVE_DECL_IBV_LINK_LAYER_ETHERNET
> - if (BTL_OPENIB_CONNECT_BASE_CHECK_IF_NOT_IB(btl)) {
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: oob CPC only supported on InfiniBand; skipped on %s:%d",
> - ibv_get_device_name(btl->device->ib_dev),
> - btl->port_num);
> - return OMPI_ERR_NOT_SUPPORTED;
> - }
> -#endif
> -
> - if (mca_btl_openib_component.num_xrc_qps > 0) {
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: oob CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d",
> - ibv_get_device_name(btl->device->ib_dev),
> - btl->port_num);
> - return OMPI_ERR_NOT_SUPPORTED;
> - }
> - /* If this btl supports OOB, then post the RML message. But
> - ensure to only post it *once*, because another btl may have
> - come in before this and already posted it. */
> - if (!rml_recv_posted) {
> - ompi_rte_recv_buffer_nb(OMPI_NAME_WILDCARD,
> - OMPI_RML_TAG_OPENIB,
> - OMPI_RML_PERSISTENT,
> - rml_recv_cb,
> - NULL);
> - rml_recv_posted = true;
> - }
> -
> - *cpc = (ompi_btl_openib_connect_base_module_t *) malloc(sizeof(ompi_btl_openib_connect_base_module_t));
> - if (NULL == *cpc) {
> - ompi_rte_recv_cancel(OMPI_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
> - rml_recv_posted = false;
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: oob CPC system error (malloc failed)");
> - return OMPI_ERR_OUT_OF_RESOURCE;
> - }
> -
> - if (oob_priority > 100) {
> - oob_priority = 100;
> - } else if (oob_priority < -1) {
> - oob_priority = -1;
> - }
> -
> - (*cpc)->data.cbm_component = &ompi_btl_openib_connect_oob;
> - (*cpc)->data.cbm_priority = oob_priority;
> - (*cpc)->data.cbm_modex_message = NULL;
> - (*cpc)->data.cbm_modex_message_len = 0;
> -
> - (*cpc)->cbm_endpoint_init = NULL;
> - (*cpc)->cbm_start_connect = oob_module_start_connect;
> - (*cpc)->cbm_endpoint_finalize = NULL;
> - (*cpc)->cbm_finalize = NULL;
> - (*cpc)->cbm_uses_cts = false;
> -
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: oob CPC available for use on %s:%d",
> - ibv_get_device_name(btl->device->ib_dev),
> - btl->port_num);
> - return OMPI_SUCCESS;
> -}
> -
> -/*
> - * Connect function. Start initiation of connections to a remote
> - * peer. We send our Queue Pair information over the RML/OOB
> - * communication mechanism. On completion of our send, a send
> - * completion handler is called.
> - */
> -static int oob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
> - mca_btl_base_endpoint_t *endpoint)
> -{
> - int rc;
> -
> - if (OMPI_SUCCESS != (rc = qp_create_all(endpoint))) {
> - return rc;
> - }
> -
> - /* Send connection info over to remote endpoint */
> - endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
> - if (OMPI_SUCCESS !=
> - (rc = send_connect_data(endpoint, ENDPOINT_CONNECT_REQUEST))) {
> - BTL_ERROR(("error sending connect request, error code %d", rc));
> - return rc;
> - }
> -
> - return OMPI_SUCCESS;
> -}
> -
> -/*
> - * Component finalize function. Cleanup RML non-blocking receive.
> - */
> -static int oob_component_finalize(void)
> -{
> - if (rml_recv_posted) {
> - ompi_rte_recv_cancel(OMPI_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
> - rml_recv_posted = false;
> - }
> -#if (ENABLE_DYNAMIC_SL)
> - btl_openib_connect_sl_finalize();
> -#endif
> - return OMPI_SUCCESS;
> -}
> -
> -/**************************************************************************/
> -
> -/*
> - * Reply to a `start - connect' message
> - */
> -static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint,
> - mca_btl_openib_rem_info_t *rem_info)
> -{
> - int rc;
> -
> - BTL_VERBOSE(("Initialized QPs, LID = %d",
> - ((mca_btl_openib_module_t*)endpoint->endpoint_btl)->lid));
> -
> - /* Create local QP's and post receive resources */
> - if (OMPI_SUCCESS != (rc = qp_create_all(endpoint))) {
> - return rc;
> - }
> -
> - /* Set the remote side info */
> - set_remote_info(endpoint, rem_info);
> -
> - /* Connect to remote endpoint qp's */
> - if (OMPI_SUCCESS != (rc = qp_connect_all(endpoint))) {
> - return rc;
> - }
> -
> - /* Send connection info over to remote endpoint */
> - endpoint->endpoint_state = MCA_BTL_IB_CONNECT_ACK;
> - if (OMPI_SUCCESS !=
> - (rc = send_connect_data(endpoint, ENDPOINT_CONNECT_RESPONSE))) {
> - BTL_ERROR(("error in endpoint send connect request error code is %d",
> - rc));
> - return rc;
> - }
> - return OMPI_SUCCESS;
> -}
> -
> -
> -static int set_remote_info(mca_btl_base_endpoint_t* endpoint,
> - mca_btl_openib_rem_info_t* rem_info)
> -{
> - /* Free up the memory pointed to by rem_qps before overwriting the pointer
> - in the following memcpy */
> - free(endpoint->rem_info.rem_qps);
> -
> - /* copy the rem_info stuff */
> - memcpy(&((mca_btl_openib_endpoint_t*) endpoint)->rem_info,
> - rem_info, sizeof(mca_btl_openib_rem_info_t));
> -
> - BTL_VERBOSE(("Setting QP info, LID = %d", endpoint->rem_info.rem_lid));
> - return OMPI_SUCCESS;
> -}
> -
> -
> -/*
> - * Connect the local ends of all qp's to the remote side
> - */
> -static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
> -{
> - int i;
> - mca_btl_openib_module_t* openib_btl =
> - (mca_btl_openib_module_t*)endpoint->endpoint_btl;
> -
> - for (i = 0; i < mca_btl_openib_component.num_qps; i++) {
> - struct ibv_qp_attr attr;
> - struct ibv_qp* qp = endpoint->qps[i].qp->lcl_qp;
> - enum ibv_mtu mtu = (enum ibv_mtu) ((openib_btl->device->mtu < endpoint->rem_info.rem_mtu) ?
> - openib_btl->device->mtu : endpoint->rem_info.rem_mtu) ;
> -
> - memset(&attr, 0, sizeof(attr));
> - attr.qp_state = IBV_QPS_RTR;
> - attr.path_mtu = mtu;
> - attr.dest_qp_num = endpoint->rem_info.rem_qps[i].rem_qp_num;
> - attr.rq_psn = endpoint->rem_info.rem_qps[i].rem_psn;
> - attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
> - attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
> - attr.ah_attr.is_global = 0;
> - attr.ah_attr.dlid = endpoint->rem_info.rem_lid;
> - attr.ah_attr.src_path_bits = openib_btl->src_path_bits;
> - attr.ah_attr.port_num = openib_btl->port_num;
> - attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> -
> -#if (ENABLE_DYNAMIC_SL)
> - /* if user enabled dynamic SL, get it from PathRecord */
> - if (0 != mca_btl_openib_component.ib_path_record_service_level) {
> - int rc = btl_openib_connect_get_pathrecord_sl(qp->context,
> - attr.ah_attr.port_num,
> - openib_btl->lid,
> - attr.ah_attr.dlid);
> - if (OMPI_ERROR == rc) {
> - return OMPI_ERROR;
> - }
> - attr.ah_attr.sl = rc;
> - }
> -#endif
> -
> - /* JMS to be filled in later dynamically */
> - attr.ah_attr.static_rate = 0;
> -
> - if (mca_btl_openib_component.verbose) {
> - BTL_OUTPUT(("Set MTU to IBV value %d (%s bytes)", mtu,
> - (mtu == IBV_MTU_256) ? "256" :
> - (mtu == IBV_MTU_512) ? "512" :
> - (mtu == IBV_MTU_1024) ? "1024" :
> - (mtu == IBV_MTU_2048) ? "2048" :
> - (mtu == IBV_MTU_4096) ? "4096" :
> - "unknown (!)"));
> - }
> -
> - if (ibv_modify_qp(qp, &attr,
> - IBV_QP_STATE |
> - IBV_QP_AV |
> - IBV_QP_PATH_MTU |
> - IBV_QP_DEST_QPN |
> - IBV_QP_RQ_PSN |
> - IBV_QP_MAX_DEST_RD_ATOMIC |
> - IBV_QP_MIN_RNR_TIMER)) {
> - BTL_ERROR(("error modifing QP to RTR errno says %s",
> - strerror(errno)));
> - return OMPI_ERROR;
> - }
> - attr.qp_state = IBV_QPS_RTS;
> - attr.timeout = mca_btl_openib_component.ib_timeout;
> - attr.retry_cnt = mca_btl_openib_component.ib_retry_count;
> - /* On PP QPs we have SW flow control, no need for rnr retries. Setting
> - * it to zero helps to catch bugs */
> - attr.rnr_retry = BTL_OPENIB_QP_TYPE_PP(i) ? 0 :
> - mca_btl_openib_component.ib_rnr_retry;
> - attr.sq_psn = endpoint->qps[i].qp->lcl_psn;
> - attr.max_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
> - if (ibv_modify_qp(qp, &attr,
> - IBV_QP_STATE |
> - IBV_QP_TIMEOUT |
> - IBV_QP_RETRY_CNT |
> - IBV_QP_RNR_RETRY |
> - IBV_QP_SQ_PSN |
> - IBV_QP_MAX_QP_RD_ATOMIC)) {
> - BTL_ERROR(("error modifying QP to RTS errno says %s",
> - strerror(errno)));
> - return OMPI_ERROR;
> - }
> - }
> -
> - return OMPI_SUCCESS;
> -}
> -
> -
> -static void permute_array(int *permuted_qps, int nqps)
> -{
> - int i;
> - int idx;
> - int tmp;
> - int control[nqps];
> -
> - for (i = 0; i < nqps; i++) {
> - permuted_qps[i] = i;
> - control[i] = 0;
> - }
> -
> - for (i = 0; i < nqps - 1; i++) {
> - idx = i + random() % (nqps - i);
> - tmp = permuted_qps[i];
> - permuted_qps[i] = permuted_qps[idx];
> - permuted_qps[idx] = tmp;
> - }
> -
> - /* verify that permutation is ok: */
> - for (i = 0; i < nqps; i++) {
> - control[permuted_qps[i]] ++;
> - }
> - for (i = 0; i < nqps; i++) {
> - if (control[i] != 1) {
> - BTL_VERBOSE(("bad permutation detected: "));
> - for (i = 0; i < nqps; i++) BTL_VERBOSE(("%d ", permuted_qps[i]));
> - BTL_VERBOSE(("\n"));
> - abort();
> - }
> - }
> -}
> -
> -
> -/*
> - * Create the local side of all the qp's. The remote sides will be
> - * connected later.
> - */
> -static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
> -{
> - int qp, rc, pp_qp_num = 0;
> - int32_t rd_rsv_total = 0;
> -
> - int rand_qpns[mca_btl_openib_component.num_qps];
> - int i;
> -
> - permute_array(rand_qpns, mca_btl_openib_component.num_qps);
> -
> -
> - for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp)
> - if(BTL_OPENIB_QP_TYPE_PP(qp)) {
> - rd_rsv_total +=
> - mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv;
> - pp_qp_num++;
> - }
> -
> - /* if there is no pp QPs we still need reserved WQE for eager rdma flow
> - * control */
> - if(0 == pp_qp_num && true == endpoint->use_eager_rdma)
> - pp_qp_num = 1;
> -
> - for (i = 0; i < mca_btl_openib_component.num_qps; ++i) {
> - struct ibv_srq *srq = NULL;
> - uint32_t max_recv_wr, max_send_wr;
> - int32_t rd_rsv, rd_num_credits;
> -
> - qp = rand_qpns[i];
> - /* QP used for SW flow control need some additional recourses */
> - if(qp == mca_btl_openib_component.credits_qp) {
> - rd_rsv = rd_rsv_total;
> - rd_num_credits = pp_qp_num;
> - } else {
> - rd_rsv = rd_num_credits = 0;
> - }
> -
> - if(BTL_OPENIB_QP_TYPE_PP(qp)) {
> - max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num + rd_rsv;
> - max_send_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
> - rd_num_credits;
> - } else {
> - srq = endpoint->endpoint_btl->qps[qp].u.srq_qp.srq;
> - /* no receives are posted to SRQ qp */
> - max_recv_wr = 0;
> - max_send_wr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max
> - + rd_num_credits;
> - }
> -
> - rc = qp_create_one(endpoint, qp, srq, max_recv_wr, max_send_wr);
> - if (OMPI_SUCCESS != rc) {
> - return rc;
> - }
> - }
> -
> - /* Now that all the qp's are created locally, post some receive
> - buffers, setup credits, etc. */
> - return mca_btl_openib_endpoint_post_recvs(endpoint);
> -}
> -
> -
> -/* Returns max inlne size for qp #N */
> -static uint32_t max_inline_size(int qp, mca_btl_openib_device_t *device)
> -{
> - if (mca_btl_openib_component.qp_infos[qp].size <= device->max_inline_data) {
> - /* If qp message size is smaller than max_inline_data,
> - * we should enable inline messages */
> - return mca_btl_openib_component.qp_infos[qp].size;
> - } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) {
> - /* If qp message size is bigger that max_inline_data, we
> - * should enable inline messages only for RDMA QP (for PUT/GET
> - * fin messages) and for the first qp */
> - return device->max_inline_data;
> - }
> - /* Otherway it is no reason for inline */
> - return 0;
> -}
> -
> -/*
> - * Create the local side of one qp. The remote side will be connected
> - * later.
> - */
> -static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
> - struct ibv_srq *srq, uint32_t max_recv_wr, uint32_t max_send_wr)
> -{
> - mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
> - struct ibv_qp *my_qp;
> - struct ibv_qp_init_attr init_attr;
> - struct ibv_qp_attr attr;
> - size_t req_inline;
> -
> - memset(&init_attr, 0, sizeof(init_attr));
> - memset(&attr, 0, sizeof(attr));
> -
> - init_attr.qp_type = IBV_QPT_RC;
> - init_attr.send_cq = openib_btl->device->ib_cq[BTL_OPENIB_RDMA_QP(qp) ? BTL_OPENIB_HP_CQ: BTL_OPENIB_LP_CQ];
> - init_attr.recv_cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
> - init_attr.srq = srq;
> - init_attr.cap.max_inline_data = req_inline =
> - max_inline_size(qp, openib_btl->device);
> - init_attr.cap.max_send_sge = 1;
> - init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
> - if(BTL_OPENIB_QP_TYPE_PP(qp)) {
> - init_attr.cap.max_recv_wr = max_recv_wr;
> - } else {
> - init_attr.cap.max_recv_wr = 0;
> - }
> - init_attr.cap.max_send_wr = max_send_wr;
> -
> - my_qp = ibv_create_qp(openib_btl->device->ib_pd, &init_attr);
> -
> - if (NULL == my_qp) {
> - opal_show_help("help-mpi-btl-openib-cpc-base.txt",
> - "ibv_create_qp failed", true,
> - ompi_process_info.nodename,
> - ibv_get_device_name(openib_btl->device->ib_dev),
> - "Reliable connected (RC)");
> - return OMPI_ERROR;
> - }
> - endpoint->qps[qp].qp->lcl_qp = my_qp;
> -
> - if (init_attr.cap.max_inline_data < req_inline) {
> - endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
> - opal_show_help("help-mpi-btl-openib-cpc-base.txt",
> - "inline truncated", true, ompi_process_info.nodename,
> - ibv_get_device_name(openib_btl->device->ib_dev),
> - openib_btl->port_num,
> - req_inline, init_attr.cap.max_inline_data);
> - } else {
> - endpoint->qps[qp].ib_inline_max = req_inline;
> - }
> -
> - attr.qp_state = IBV_QPS_INIT;
> - attr.pkey_index = openib_btl->pkey_index;
> - attr.port_num = openib_btl->port_num;
> - attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
> -
> - if (ibv_modify_qp(endpoint->qps[qp].qp->lcl_qp,
> - &attr,
> - IBV_QP_STATE |
> - IBV_QP_PKEY_INDEX |
> - IBV_QP_PORT |
> - IBV_QP_ACCESS_FLAGS )) {
> - BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
> - return OMPI_ERROR;
> - }
> -
> - /* Setup meta data on the endpoint */
> - endpoint->qps[qp].qp->lcl_psn = lrand48() & 0xffffff;
> - endpoint->qps[qp].credit_frag = NULL;
> -
> - return OMPI_SUCCESS;
> -}
> -
> -
> -/*
> - * RML send connect information to remote endpoint
> - */
> -static int send_connect_data(mca_btl_base_endpoint_t* endpoint,
> - uint8_t message_type)
> -{
> - opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t);
> - int rc;
> -
> - if (NULL == buffer) {
> - OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
> - return OMPI_ERR_OUT_OF_RESOURCE;
> - }
> -
> - /* pack the info in the send buffer */
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
> - rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
> - rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - if (message_type != ENDPOINT_CONNECT_REQUEST) {
> - /* send the QP connect request info we respond to */
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer,
> - &endpoint->rem_info.rem_qps[0].rem_qp_num, 1,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
> - rc = opal_dss.pack(buffer, &endpoint->rem_info.rem_lid, 1, OPAL_UINT16);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> -
> - if (message_type != ENDPOINT_CONNECT_ACK) {
> - int qp;
> - /* stuff all the QP info into the buffer */
> - for (qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_qp->qp_num,
> - 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->qps[qp].qp->lcl_psn, 1,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> -
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
> - rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->device->mtu, 1,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> -
> - /* send to remote endpoint */
> - rc = ompi_rte_send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
> - buffer, OMPI_RML_TAG_OPENIB,
> - rml_send_cb, NULL);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("Sent QP Info, LID = %d, SUBNET = %" PRIx64 "\n",
> - endpoint->endpoint_btl->lid,
> - endpoint->subnet_id));
> -
> - return OMPI_SUCCESS;
> -}
> -
> -
> -/*
> - * Callback when we have finished RML sending the connect data to a
> - * remote peer
> - */
> -static void rml_send_cb(int status, ompi_process_name_t* endpoint,
> - opal_buffer_t* buffer, ompi_rml_tag_t tag,
> - void* cbdata)
> -{
> - OBJ_RELEASE(buffer);
> -}
> -
> -
> -/*
> - * Non blocking RML recv callback. Read incoming QP and other info,
> - * and if this endpoint is trying to connect, reply with our QP info,
> - * otherwise try to modify QP's and establish reliable connection
> - */
> -static void rml_recv_cb(int status, ompi_process_name_t* process_name,
> - opal_buffer_t* buffer, ompi_rml_tag_t tag,
> - void* cbdata)
> -{
> - mca_btl_openib_proc_t *ib_proc;
> - mca_btl_openib_endpoint_t *ib_endpoint = NULL;
> - int endpoint_state;
> - int rc;
> - uint32_t i, lcl_qp = 0;
> - uint16_t lcl_lid = 0;
> - int32_t cnt = 1;
> - mca_btl_openib_rem_info_t rem_info;
> - uint8_t message_type;
> - bool master;
> -
> - /* We later memcpy this whole structure. Make sure
> - that all the parameters are initialized, especially
> - the pointers */
> - memset(&rem_info,0, sizeof(rem_info));
> -
> - /* start by unpacking data first so we know who is knocking at
> - our door */
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
> - rc = opal_dss.unpack(buffer, &message_type, &cnt, OPAL_UINT8);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> -
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
> - rc = opal_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, OPAL_UINT64);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> -
> - if (ENDPOINT_CONNECT_REQUEST != message_type) {
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &lcl_qp, &cnt, OPAL_UINT32);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
> - rc = opal_dss.unpack(buffer, &lcl_lid, &cnt, OPAL_UINT16);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - }
> - if (ENDPOINT_CONNECT_ACK != message_type) {
> - int qp;
> - /* get ready for the data */
> - rem_info.rem_qps =
> - (mca_btl_openib_rem_qp_info_t*) malloc(sizeof(mca_btl_openib_rem_qp_info_t) *
> - mca_btl_openib_component.num_qps);
> -
> - /* unpack all the qp info */
> - for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_qp_num, &cnt,
> - OPAL_UINT32);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &rem_info.rem_qps[qp].rem_psn, &cnt,
> - OPAL_UINT32);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - }
> -
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
> - rc = opal_dss.unpack(buffer, &rem_info.rem_lid, &cnt, OPAL_UINT16);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, OPAL_UINT32);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &rem_info.rem_index, &cnt, OPAL_UINT32);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - }
> -
> - BTL_VERBOSE(("Received QP Info, LID = %d, SUBNET = %" PRIx64 "\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id));
> -
> - master = ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, OMPI_PROC_MY_NAME,
> - process_name) > 0 ? true : false;
> -
> - /* Need to protect the ib_procs list */
> - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
> -
> - for (ib_proc = (mca_btl_openib_proc_t*)
> - opal_list_get_first(&mca_btl_openib_component.ib_procs);
> - ib_proc != (mca_btl_openib_proc_t*)
> - opal_list_get_end(&mca_btl_openib_component.ib_procs);
> - ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
> - bool found = false;
> -
> - if (OPAL_EQUAL != ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
> - &ib_proc->proc_ompi->proc_name, process_name)) {
> - continue;
> - }
> -
> - if (ENDPOINT_CONNECT_REQUEST != message_type) {
> - /* This is a reply message. Try to get the endpoint
> - instance the reply belongs to */
> - for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
> - ib_endpoint = ib_proc->proc_endpoints[i];
> - if (ib_endpoint->qps[0].qp->lcl_qp != NULL &&
> - lcl_lid == ib_endpoint->endpoint_btl->lid &&
> - lcl_qp == ib_endpoint->qps[0].qp->lcl_qp->qp_num &&
> - rem_info.rem_subnet_id == ib_endpoint->subnet_id) {
> - found = true;
> - break;
> - }
> - }
> - } else {
> - /* This is new connection request. If this is master try
> - to find endpoint in a connecting state. If this is
> - slave try to find endpoint in closed state and
> - initiate connection back */
> - mca_btl_openib_endpoint_t *ib_endpoint_found = NULL;
> - int master_first_closed = -1;
> -
> - for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
> - ib_endpoint = ib_proc->proc_endpoints[i];
> - if (ib_endpoint->subnet_id != rem_info.rem_subnet_id ||
> - (ib_endpoint->endpoint_state != MCA_BTL_IB_CONNECTING
> - && ib_endpoint->endpoint_state != MCA_BTL_IB_CLOSED))
> - continue;
> - found = true;
> - ib_endpoint_found = ib_endpoint;
> -
> - if (master && -1 == master_first_closed &&
> - MCA_BTL_IB_CLOSED == ib_endpoint->endpoint_state ) {
> - /* capture in case no endpoint in connecting state */
> - master_first_closed = i;
> - }
> -
> - if ((master &&
> - MCA_BTL_IB_CONNECTING == ib_endpoint->endpoint_state) ||
> - (!master &&
> - MCA_BTL_IB_CLOSED == ib_endpoint->endpoint_state))
> - break; /* Found one. No point to continue */
> - }
> - ib_endpoint = ib_endpoint_found;
> -
> - if (found && master &&
> - MCA_BTL_IB_CLOSED == ib_endpoint->endpoint_state ) {
> - /* since this is master and no endpoints found in
> - * connecting state use the first endpoint found
> - * in closed state */
> - ib_endpoint = ib_proc->proc_endpoints[master_first_closed];
> - }
> -
> - /* if this is slave and there is no endpoints in closed
> - state then all connection are already in progress so
> - just ignore this connection request */
> - if (found && !master &&
> - MCA_BTL_IB_CLOSED != ib_endpoint->endpoint_state) {
> - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
> - return;
> - }
> - }
> -
> - if (!found) {
> - BTL_ERROR(("can't find suitable endpoint for this peer\n"));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
> - return;
> - }
> -
> - OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
> - endpoint_state = ib_endpoint->endpoint_state;
> -
> - /* Update status */
> - switch (endpoint_state) {
> - case MCA_BTL_IB_CLOSED :
> - /* We had this connection closed before. The endpoint is
> - trying to connect. Move the status of this connection
> - to CONNECTING, and then reply with our QP
> - information */
> - if (master) {
> - assert(rem_info.rem_qps != NULL);
> - rc = reply_start_connect(ib_endpoint, &rem_info);
> - } else {
> - rc = oob_module_start_connect(ib_endpoint->endpoint_local_cpc,
> - ib_endpoint);
> - }
> -
> - if (OMPI_SUCCESS != rc) {
> - BTL_ERROR(("error in endpoint reply start connect"));
> - mca_btl_openib_endpoint_invoke_error(ib_endpoint);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - break;
> - }
> -
> - /* As long as we expect a message from the peer (in order
> - to setup the connection) let the event engine pool the
> - RML events. Note: we increment it once peer active
> - connection. */
> - opal_progress_event_users_increment();
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - break;
> -
> - case MCA_BTL_IB_CONNECTING :
> - assert(rem_info.rem_qps != NULL);
> - set_remote_info(ib_endpoint, &rem_info);
> - if (OMPI_SUCCESS != (rc = qp_connect_all(ib_endpoint))) {
> - BTL_ERROR(("endpoint connect error: %d", rc));
> - mca_btl_openib_endpoint_invoke_error(ib_endpoint);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - break;
> - }
> -
> - if (master) {
> - ib_endpoint->endpoint_state = MCA_BTL_IB_WAITING_ACK;
> -
> - /* Send him an ACK */
> - send_connect_data(ib_endpoint, ENDPOINT_CONNECT_RESPONSE);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - } else {
> - send_connect_data(ib_endpoint, ENDPOINT_CONNECT_ACK);
> - /* Tell main BTL that we're done */
> - mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
> - /* cpc complete unlock the endpoint */
> - }
> - break;
> -
> - case MCA_BTL_IB_WAITING_ACK:
> - /* Tell main BTL that we're done */
> - mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
> - /* cpc complete unlock the endpoint */
> - break;
> -
> - case MCA_BTL_IB_CONNECT_ACK:
> - send_connect_data(ib_endpoint, ENDPOINT_CONNECT_ACK);
> - /* Tell main BTL that we're done */
> - mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
> - /* cpc complete unlock the endpoint */
> - break;
> -
> - case MCA_BTL_IB_CONNECTED:
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - break;
> -
> - case MCA_BTL_IB_FAILED:
> - /* This connection has been put in the failed state
> - * so just ignore the connection message. */
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - break;
> -
> - default :
> - BTL_ERROR(("Invalid endpoint state %d", endpoint_state));
> - mca_btl_openib_endpoint_invoke_error(ib_endpoint);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - }
> - break;
> - }
> - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
> -}
>
> Deleted: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.h
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.h 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29702)
> +++ /dev/null 00:00:00 1970 (deleted)
> @@ -1,18 +0,0 @@
> -/*
> - * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
> - *
> - * $COPYRIGHT$
> - *
> - * Additional copyrights may follow
> - *
> - * $HEADER$
> - */
> -
> -#ifndef BTL_OPENIB_CONNECT_OOB_H
> -#define BTL_OPENIB_CONNECT_OOB_H
> -
> -#include "connect/connect.h"
> -
> -extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_oob;
> -
> -#endif
>
> Deleted: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29702)
> +++ /dev/null 00:00:00 1970 (deleted)
> @@ -1,1150 +0,0 @@
> -/*
> - * Copyright (c) 2007-2011 Mellanox Technologies. All rights reserved.
> - * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
> - * Copyright (c) 2009 IBM Corporation. All rights reserved.
> - * Copyright (c) 2010-2011 The University of Tennessee and The University
> - * of Tennessee Research Foundation. All rights
> - * reserved.
> - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights
> - * reserved.
> - * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
> - *
> - * $COPYRIGHT$
> - *
> - * Additional copyrights may follow
> - *
> - * $HEADER$
> - */
> -
> -#include "ompi_config.h"
> -
> -#include "opal_stdint.h"
> -#include "opal/dss/dss.h"
> -#include "opal/util/error.h"
> -#include "opal/util/output.h"
> -#include "opal/util/show_help.h"
> -
> -#include "ompi/mca/rte/rte.h"
> -
> -#include "btl_openib.h"
> -#include "btl_openib_endpoint.h"
> -#include "btl_openib_proc.h"
> -#include "btl_openib_xrc.h"
> -#include "btl_openib_async.h"
> -#include "connect/connect.h"
> -#if (ENABLE_DYNAMIC_SL)
> -#include "connect/btl_openib_connect_sl.h"
> -#endif
> -
> -static void xoob_component_register(void);
> -static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
> - ompi_btl_openib_connect_base_module_t **cpc);
> -static int xoob_component_finalize(void);
> -
> -static int xoob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
> - mca_btl_base_endpoint_t *endpoint);
> -
> -/*
> - * The "component" struct -- the top-level function pointers for the
> - * xoob connection scheme.
> - */
> -ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_xoob = {
> - "xoob",
> - /* Register */
> - xoob_component_register,
> - /* Init */
> - NULL,
> - /* Query */
> - xoob_component_query,
> - /* Finalize */
> - xoob_component_finalize,
> -};
> -
> -typedef enum {
> - ENDPOINT_XOOB_CONNECT_REQUEST,
> - ENDPOINT_XOOB_CONNECT_RESPONSE,
> - ENDPOINT_XOOB_CONNECT_XRC_REQUEST,
> - ENDPOINT_XOOB_CONNECT_XRC_RESPONSE,
> - ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE /* The xrc recv qp already was destroyed */
> -} connect_message_type_t;
> -
> -static bool rml_recv_posted = false;
> -
> -#define XOOB_SET_REMOTE_INFO(EP, INFO) \
> -do { \
> - /* copy the rem_info stuff */ \
> - EP.rem_lid = INFO.rem_lid; \
> - EP.rem_subnet_id = INFO.rem_subnet_id; \
> - EP.rem_mtu = INFO.rem_mtu; \
> - EP.rem_index = INFO.rem_index; \
> - memcpy((void*)EP.rem_qps, (void*)INFO.rem_qps, \
> - sizeof(mca_btl_openib_rem_qp_info_t)); \
> - /* copy the rem_info stuff */ \
> - memcpy((void*)EP.rem_srqs, (void*)INFO.rem_srqs, \
> - sizeof(mca_btl_openib_rem_srq_info_t) * \
> - mca_btl_openib_component.num_xrc_qps); \
> -} while (0)
> -
> -static int xoob_priority = 60;
> -
> -/*
> - * Callback when we have finished RML sending the connect data to a
> - * remote peer
> - */
> -static void xoob_rml_send_cb(int status, ompi_process_name_t* endpoint,
> - opal_buffer_t* buffer, ompi_rml_tag_t tag,
> - void* cbdata)
> -{
> - OBJ_RELEASE(buffer);
> -}
> -
> -/* Receive connect information to remote endpoint */
> -static int xoob_receive_connect_data(mca_btl_openib_rem_info_t *info, uint16_t *lid,
> - uint8_t *message_type, opal_buffer_t* buffer)
> -{
> - int cnt = 1, rc, srq;
> -
> - /* Recv standart header */
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT8));
> - rc = opal_dss.unpack(buffer, message_type, &cnt, OPAL_UINT8);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack Message type = %d\n", *message_type));
> -
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT64));
> - rc = opal_dss.unpack(buffer, &info->rem_subnet_id, &cnt, OPAL_UINT64);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack sid = %" PRIx64 "\n", info->rem_subnet_id));
> -
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
> - rc = opal_dss.unpack(buffer, &info->rem_lid, &cnt, OPAL_UINT16);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack lid = %d", info->rem_lid));
> -
> - /* Till now we got the standart header, now we continue to recieve data for
> - * different packet types
> - */
> - if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type ||
> - ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type) {
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack remote qp = %x", info->rem_qps->rem_qp_num));
> -
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &info->rem_qps->rem_psn, &cnt,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack remote psn = %d", info->rem_qps->rem_psn));
> -
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &info->rem_mtu, &cnt, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack remote mtu = %d", info->rem_mtu));
> - }
> -
> - if (ENDPOINT_XOOB_CONNECT_REQUEST == *message_type ||
> - ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) {
> - /* unpack requested lid info */
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT16));
> - rc = opal_dss.unpack(buffer, lid, &cnt, OPAL_UINT16);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack requested lid = %d", *lid));
> - }
> -
> - /* Unpack requested recv qp number */
> - if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == *message_type) {
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - /* In XRC request case we will use rem_qp_num as container for requested qp number */
> - rc = opal_dss.unpack(buffer, &info->rem_qps->rem_qp_num, &cnt,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("Recv unpack requested qp = %x", info->rem_qps->rem_qp_num));
> - }
> -
> - if (ENDPOINT_XOOB_CONNECT_RESPONSE == *message_type ||
> - ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == *message_type) {
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &info->rem_index, &cnt, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack remote index = %d", info->rem_index));
> -
> - for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) {
> - BTL_VERBOSE(("unpacking %d of %d\n", cnt, OPAL_UINT32));
> - rc = opal_dss.unpack(buffer, &info->rem_srqs[srq].rem_srq_num, &cnt, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return OMPI_ERROR;
> - }
> - BTL_VERBOSE(("Recv unpack remote index srq num[%d]= %d", srq, info->rem_srqs[srq].rem_srq_num));
> - }
> - }
> - return OMPI_SUCCESS;
> -}
> -
> -/*
> - * send connect information to remote endpoint
> - */
> -static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
> - uint8_t message_type)
> -{
> - opal_buffer_t* buffer = OBJ_NEW(opal_buffer_t);
> - int rc, srq;
> -
> - if (NULL == buffer) {
> - OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
> - return OMPI_ERR_OUT_OF_RESOURCE;
> - }
> -
> - /* Bulding standart header that we use in all messages:
> - * - Message type,
> - * - Our subnet id
> - * - Our LID
> - */
> - /* pack the info in the send buffer */
> - BTL_VERBOSE(("Send pack Message type = %d", message_type));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT8));
> - rc = opal_dss.pack(buffer, &message_type, 1, OPAL_UINT8);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - BTL_VERBOSE(("Send pack sid = %" PRIx64 "\n", endpoint->subnet_id));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT64));
> - rc = opal_dss.pack(buffer, &endpoint->subnet_id, 1, OPAL_UINT64);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - BTL_VERBOSE(("Send pack lid = %d", endpoint->endpoint_btl->lid));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
> - rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->lid, 1, OPAL_UINT16);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - /* Now we append to standart header additional information
> - * that is required for full (open qp,etc..) connect request and response:
> - * - qp_num of first qp
> - * - psn of first qp
> - * - MTU
> - */
> - if (ENDPOINT_XOOB_CONNECT_REQUEST == message_type ||
> - ENDPOINT_XOOB_CONNECT_RESPONSE == message_type) {
> - uint32_t psn, qp_num;
> -
> - if (ENDPOINT_XOOB_CONNECT_REQUEST == message_type) {
> - qp_num = endpoint->qps[0].qp->lcl_qp->qp_num;
> - psn = endpoint->qps[0].qp->lcl_psn;
> - } else {
> - qp_num = endpoint->xrc_recv_qp_num;
> - psn = endpoint->xrc_recv_psn;
> - }
> - /* stuff all the QP info into the buffer */
> - /* we need to send only one QP */
> - BTL_VERBOSE(("Send pack qp num = %x", qp_num));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &qp_num, 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - BTL_VERBOSE(("Send pack lpsn = %d", psn));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &psn, 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - BTL_VERBOSE(("Send pack mtu = %d", endpoint->endpoint_btl->device->mtu));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->device->mtu, 1,
> - OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> -
> - /* We append to header above additional information
> - * that is required for full & XRC connect request:
> - * - The lid ob btl on remote site that we want to connect
> - */
> - if (ENDPOINT_XOOB_CONNECT_REQUEST == message_type ||
> - ENDPOINT_XOOB_CONNECT_XRC_REQUEST == message_type) {
> - /* when we are sending request we add remote lid that we want to connect */
> -
> - BTL_VERBOSE(("Send pack remote lid = %d", endpoint->ib_addr->lid));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT16));
> - rc = opal_dss.pack(buffer, &endpoint->ib_addr->lid, 1, OPAL_UINT16);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> -
> - /* when we are sending xrc request we add remote
> - * recv qp number that we want to connect. */
> - if (ENDPOINT_XOOB_CONNECT_XRC_REQUEST == message_type) {
> - BTL_VERBOSE(("Send pack remote qp = %x", endpoint->ib_addr->remote_xrc_rcv_qp_num));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->ib_addr->remote_xrc_rcv_qp_num,
> - 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> - /* We append to header above additional information
> - * that is required for full & XRC connect response:
> - * - index of our endpoint
> - * - array of xrc-srq numbers
> - */
> - if (ENDPOINT_XOOB_CONNECT_RESPONSE == message_type ||
> - ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == message_type) {
> - /* we need to send the endpoint index for immidate send */
> - BTL_VERBOSE(("Send pack index = %d", endpoint->index));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->index, 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - /* on response we add all SRQ numbers */
> - for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) {
> - BTL_VERBOSE(("Send pack srq[%d] num = %d", srq, endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num));
> - BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
> - rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num,
> - 1, OPAL_UINT32);
> - if (OPAL_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> - }
> - }
> -
> - /* send to remote endpoint */
> - rc = ompi_rte_send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name,
> - buffer, OMPI_RML_TAG_XOPENIB,
> - xoob_rml_send_cb, NULL);
> - if (OMPI_SUCCESS != rc) {
> - OMPI_ERROR_LOG(rc);
> - return rc;
> - }
> -
> - BTL_VERBOSE(("Send QP Info, LID = %d, SUBNET = %" PRIx64 ", Message type = %d",
> - endpoint->endpoint_btl->lid,
> - endpoint->subnet_id,
> - message_type));
> -
> - return OMPI_SUCCESS;
> -}
> -
> -/* Create XRC send qp */
> -static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
> -{
> - int prio = BTL_OPENIB_LP_CQ; /* all send completions go to low prio CQ */
> - uint32_t send_wr;
> - struct ibv_qp **qp;
> - uint32_t *psn;
> - struct ibv_qp_init_attr qp_init_attr;
> - struct ibv_qp_attr attr;
> - int ret;
> - size_t req_inline;
> -
> - mca_btl_openib_module_t *openib_btl =
> - (mca_btl_openib_module_t*)endpoint->endpoint_btl;
> -
> - /* Prepare QP structs */
> - BTL_VERBOSE(("Creating Send QP\n"));
> - qp = &endpoint->qps[0].qp->lcl_qp;
> - psn = &endpoint->qps[0].qp->lcl_psn;
> - /* reserve additional wr for eager rdma credit management */
> - send_wr = endpoint->ib_addr->qp->sd_wqe +
> - (mca_btl_openib_component.use_eager_rdma ?
> - mca_btl_openib_component.max_eager_rdma : 0);
> - memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
> - memset(&attr, 0, sizeof(struct ibv_qp_attr));
> -
> - qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio];
> -
> - /* no need recv queue; receives are posted to srq */
> - qp_init_attr.cap.max_recv_wr = 0;
> - qp_init_attr.cap.max_send_wr = send_wr;
> - qp_init_attr.cap.max_inline_data = req_inline =
> - openib_btl->device->max_inline_data;
> - qp_init_attr.cap.max_send_sge = 1;
> - /* this one is ignored by driver */
> - qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
> - qp_init_attr.qp_type = IBV_QPT_XRC;
> - qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
> - *qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr);
> - if (NULL == *qp) {
> - opal_show_help("help-mpi-btl-openib-cpc-base.txt",
> - "ibv_create_qp failed", true,
> - ompi_process_info.nodename,
> - ibv_get_device_name(openib_btl->device->ib_dev),
> - "Reliable connected (XRC)");
> - return OMPI_ERROR;
> - }
> -
> - if (qp_init_attr.cap.max_inline_data < req_inline) {
> - endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data;
> - opal_show_help("help-mpi-btl-openib-cpc-base.txt",
> - "inline truncated", ompi_process_info.nodename,
> - ibv_get_device_name(openib_btl->device->ib_dev),
> - openib_btl->port_num,
> - req_inline, qp_init_attr.cap.max_inline_data);
> - } else {
> - endpoint->qps[0].ib_inline_max = req_inline;
> - }
> -
> - attr.qp_state = IBV_QPS_INIT;
> - attr.pkey_index = openib_btl->pkey_index;
> - attr.port_num = openib_btl->port_num;
> - attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
> - ret = ibv_modify_qp(*qp, &attr,
> - IBV_QP_STATE |
> - IBV_QP_PKEY_INDEX |
> - IBV_QP_PORT |
> - IBV_QP_ACCESS_FLAGS );
> - if (ret) {
> - BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_INIT errno says: %s [%d]",
> - (*qp)->qp_num, strerror(ret), ret));
> - return OMPI_ERROR;
> - }
> -
> - /* Setup meta data on the endpoint */
> - *psn = lrand48() & 0xffffff;
> -
> - /* Now that all the qp's are created locally, post some receive
> - buffers, setup credits, etc. */
> - return mca_btl_openib_endpoint_post_recvs(endpoint);
> -}
> -
> -/* Send qp connect */
> -static int xoob_send_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info)
> -{
> - struct ibv_qp* qp;
> - struct ibv_qp_attr attr;
> - uint32_t psn;
> - int ret;
> -
> - mca_btl_openib_module_t* openib_btl =
> - (mca_btl_openib_module_t*)endpoint->endpoint_btl;
> -
> - BTL_VERBOSE(("Connecting Send QP\n"));
> - assert(NULL != endpoint->qps);
> - qp = endpoint->qps[0].qp->lcl_qp;
> - psn = endpoint->qps[0].qp->lcl_psn;
> -
> - memset(&attr, 0, sizeof(attr));
> - attr.qp_state = IBV_QPS_RTR;
> - attr.path_mtu = (openib_btl->device->mtu < endpoint->rem_info.rem_mtu) ?
> - openib_btl->device->mtu : rem_info->rem_mtu;
> - attr.dest_qp_num = rem_info->rem_qps->rem_qp_num;
> - attr.rq_psn = rem_info->rem_qps->rem_psn;
> - attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
> - attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
> - attr.ah_attr.is_global = 0;
> - attr.ah_attr.dlid = rem_info->rem_lid;
> - attr.ah_attr.src_path_bits = openib_btl->src_path_bits;
> - attr.ah_attr.port_num = openib_btl->port_num;
> - attr.ah_attr.static_rate = 0;
> - attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> -
> -#if (ENABLE_DYNAMIC_SL)
> - /* if user enabled dynamic SL, get it from PathRecord */
> - if (0 != mca_btl_openib_component.ib_path_record_service_level) {
> - int rc = btl_openib_connect_get_pathrecord_sl(qp->context,
> - attr.ah_attr.port_num,
> - openib_btl->lid,
> - attr.ah_attr.dlid);
> - if (OMPI_ERROR == rc) {
> - return OMPI_ERROR;
> - }
> - attr.ah_attr.sl = rc;
> - }
> -#endif
> -
> - if (mca_btl_openib_component.verbose) {
> - BTL_VERBOSE(("Set MTU to IBV value %d (%s bytes)", attr.path_mtu,
> - (attr.path_mtu == IBV_MTU_256) ? "256" :
> - (attr.path_mtu == IBV_MTU_512) ? "512" :
> - (attr.path_mtu == IBV_MTU_1024) ? "1024" :
> - (attr.path_mtu == IBV_MTU_2048) ? "2048" :
> - (attr.path_mtu == IBV_MTU_4096) ? "4096" :
> - "unknown (!)"));
> - }
> - ret = ibv_modify_qp(qp, &attr,
> - IBV_QP_STATE |
> - IBV_QP_AV |
> - IBV_QP_PATH_MTU |
> - IBV_QP_DEST_QPN |
> - IBV_QP_RQ_PSN |
> - IBV_QP_MAX_DEST_RD_ATOMIC |
> - IBV_QP_MIN_RNR_TIMER);
> - if (ret) {
> - BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_RTR errno says: %s [%d]",
> - qp->qp_num, strerror(ret), ret));
> - return OMPI_ERROR;
> - }
> -
> - attr.qp_state = IBV_QPS_RTS;
> - attr.timeout = mca_btl_openib_component.ib_timeout;
> - attr.retry_cnt = mca_btl_openib_component.ib_retry_count;
> - attr.rnr_retry = mca_btl_openib_component.ib_rnr_retry;
> - attr.sq_psn = psn;
> - attr.max_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
> - ret = ibv_modify_qp(qp, &attr,
> - IBV_QP_STATE |
> - IBV_QP_TIMEOUT |
> - IBV_QP_RETRY_CNT |
> - IBV_QP_RNR_RETRY |
> - IBV_QP_SQ_PSN |
> - IBV_QP_MAX_QP_RD_ATOMIC);
> - if (ret) {
> - BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_RTS errno says: %s [%d]",
> - qp->qp_num, strerror(ret), ret));
> - return OMPI_ERROR;
> - }
> -
> - return OMPI_SUCCESS;
> -}
> -
> -/* Recv qp create */
> -static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info)
> -{
> - struct ibv_qp_init_attr qp_init_attr;
> - struct ibv_qp_attr attr;
> - int ret;
> -
> - mca_btl_openib_module_t* openib_btl =
> - (mca_btl_openib_module_t*)endpoint->endpoint_btl;
> -
> - BTL_VERBOSE(("Connecting Recv QP\n"));
> -
> - memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
> - /* Only xrc_domain is required, all other are ignored */
> - qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
> - ret = ibv_create_xrc_rcv_qp(&qp_init_attr, &endpoint->xrc_recv_qp_num);
> - if (ret) {
> - BTL_ERROR(("Error creating XRC recv QP[%x], errno says: %s [%d]",
> - endpoint->xrc_recv_qp_num, strerror(ret), ret));
> - return OMPI_ERROR;
> - }
> -
> - memset(&attr, 0, sizeof(struct ibv_qp_attr));
> - attr.qp_state = IBV_QPS_INIT;
> - attr.pkey_index = openib_btl->pkey_index;
> - attr.port_num = openib_btl->port_num;
> - attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
> - ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
> - endpoint->xrc_recv_qp_num,
> - &attr,
> - IBV_QP_STATE|
> - IBV_QP_PKEY_INDEX|
> - IBV_QP_PORT|
> - IBV_QP_ACCESS_FLAGS);
> - if (ret) {
> - BTL_ERROR(("Error modifying XRC recv QP[%x] to IBV_QPS_INIT, errno says: %s [%d]",
> - endpoint->xrc_recv_qp_num, strerror(ret), ret));
> - while(1);
> - return OMPI_ERROR;
> - }
> -
> - memset(&attr, 0, sizeof(struct ibv_qp_attr));
> - attr.qp_state = IBV_QPS_RTR;
> - attr.path_mtu = (openib_btl->device->mtu < endpoint->rem_info.rem_mtu) ?
> - openib_btl->device->mtu : rem_info->rem_mtu;
> - attr.dest_qp_num = rem_info->rem_qps->rem_qp_num;
> - attr.rq_psn = rem_info->rem_qps->rem_psn;
> - attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
> - attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
> - attr.ah_attr.is_global = 0;
> - attr.ah_attr.dlid = rem_info->rem_lid;
> - attr.ah_attr.src_path_bits = openib_btl->src_path_bits;
> - attr.ah_attr.port_num = openib_btl->port_num;
> - attr.ah_attr.static_rate = 0;
> - attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> -
> -#if (ENABLE_DYNAMIC_SL)
> - /* if user enabled dynamic SL, get it from PathRecord */
> - if (0 != mca_btl_openib_component.ib_path_record_service_level) {
> - int rc = btl_openib_connect_get_pathrecord_sl(
> - openib_btl->device->xrc_domain->context,
> - attr.ah_attr.port_num,
> - openib_btl->lid,
> - attr.ah_attr.dlid);
> - if (OMPI_ERROR == rc) {
> - return OMPI_ERROR;
> - }
> - attr.ah_attr.sl = rc;
> - }
> -#endif
> -
> - ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
> - endpoint->xrc_recv_qp_num,
> - &attr,
> - IBV_QP_STATE|
> - IBV_QP_AV|
> - IBV_QP_PATH_MTU|
> - IBV_QP_DEST_QPN|
> - IBV_QP_RQ_PSN|
> - IBV_QP_MAX_DEST_RD_ATOMIC|
> - IBV_QP_MIN_RNR_TIMER);
> - if (ret) {
> - BTL_ERROR(("Error modifying XRC recv QP[%x] to IBV_QPS_RTR, errno says: %s [%d]",
> - endpoint->xrc_recv_qp_num, strerror(ret), ret));
> - return OMPI_ERROR;
> - }
> -#if OPAL_HAVE_THREADS
> - if (APM_ENABLED) {
> - mca_btl_openib_load_apm_xrc_rcv(endpoint->xrc_recv_qp_num, endpoint);
> - }
> -#endif
> -
> - return OMPI_SUCCESS;
> -}
> -
> -/* Recv qp connect */
> -static int xoob_recv_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info)
> -{
> - int ret;
> -
> - mca_btl_openib_module_t* openib_btl =
> - (mca_btl_openib_module_t*)endpoint->endpoint_btl;
> -
> - BTL_VERBOSE(("Connecting Recv QP\n"));
> - ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, rem_info->rem_qps->rem_qp_num);
> - if (ret) { /* failed to regester the qp, so it is already die and we should create new one */
> - /* Return NOT READY !!!*/
> - BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d)\n. Replying with RNR",
> - rem_info->rem_qps->rem_qp_num, strerror(ret), ret));
> - return OMPI_ERROR;
> - } else {
> - /* save the qp number for unregister */
> - endpoint->xrc_recv_qp_num = rem_info->rem_qps->rem_qp_num;
> - return OMPI_SUCCESS;
> - }
> -}
> -
> -/*
> - * Reply to a `start - connect' message
> - */
> -static int xoob_reply_first_connect(mca_btl_openib_endpoint_t *endpoint,
> - mca_btl_openib_rem_info_t *rem_info)
> -{
> - int rc;
> -
> - BTL_VERBOSE(("Initialized QPs, LID = %d",
> - ((mca_btl_openib_module_t*)endpoint->endpoint_btl)->lid));
> -
> - /* Create local QP's and post receive resources */
> - if (OMPI_SUCCESS != (rc = xoob_recv_qp_create(endpoint, rem_info))) {
> - return rc;
> - }
> -
> - if (OMPI_SUCCESS !=
> - (rc = xoob_send_connect_data(endpoint, ENDPOINT_XOOB_CONNECT_RESPONSE))) {
> - BTL_ERROR(("error in endpoint send connect request error code is %d",
> - rc));
> - return rc;
> - }
> - return OMPI_SUCCESS;
> -}
> -
> -/* Find endpoint for specific subnet/lid/message */
> -static mca_btl_openib_endpoint_t* xoob_find_endpoint(ompi_process_name_t* process_name,
> - uint64_t subnet_id, uint16_t lid, uint8_t message_type)
> -{
> - size_t i;
> - mca_btl_openib_proc_t *ib_proc;
> - mca_btl_openib_endpoint_t *ib_endpoint = NULL;
> - bool found = false;
> -
> - BTL_VERBOSE(("Searching for ep and proc with follow parameters:"
> - "jobid %d, vpid %d, "
> - "sid %" PRIx64 ", lid %d",
> - process_name->jobid, process_name->vpid,
> - subnet_id, lid));
> -
> -
> - /* find ibproc */
> - OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
> - for (ib_proc = (mca_btl_openib_proc_t*)
> - opal_list_get_first(&mca_btl_openib_component.ib_procs);
> - ib_proc != (mca_btl_openib_proc_t*)
> - opal_list_get_end(&mca_btl_openib_component.ib_procs);
> - ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) {
> - if (OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
> - &ib_proc->proc_ompi->proc_name, process_name)) {
> - found = true;
> - break;
> - }
> - }
> - /* we found our ib_proc, lets find endpoint now */
> - if (found) {
> - for (i = 0; i < ib_proc->proc_endpoint_count; i++) {
> - ib_endpoint = ib_proc->proc_endpoints[i];
> - /* we need to check different
> - * lid for different message type */
> - if (ENDPOINT_XOOB_CONNECT_RESPONSE == message_type ||
> - ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == message_type) {
> - /* response message */
> - if (ib_endpoint->subnet_id == subnet_id &&
> - ib_endpoint->ib_addr->lid == lid) {
> - break; /* Found one */
> - }
> - } else {
> - /* request message */
> - if (ib_endpoint->subnet_id == subnet_id &&
> - ib_endpoint->endpoint_btl->lid == lid) {
> - break; /* Found one */
> - }
> - }
> - }
> - if (NULL == ib_endpoint) {
> - BTL_ERROR(("can't find suitable endpoint for this peer\n"));
> - }
> - } else {
> - BTL_ERROR(("can't find suitable endpoint for this peer\n"));
> - }
> - OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
> - return ib_endpoint;
> -}
> -
> -/* In case if XRC recv qp was closed and sender still don't know about it
> - * we need close the qp, reset the ib_adrr status to CLOSED and start everything
> - * from scratch.
> - */
> -static void xoob_restart_connect(mca_btl_base_endpoint_t *endpoint)
> -{
> - BTL_VERBOSE(("Restarting the connection for the endpoint"));
> - OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock);
> - switch (endpoint->ib_addr->status) {
> - case MCA_BTL_IB_ADDR_CONNECTED:
> - /* so we have the send qp, we just need the recive site.
> - * Send request for SRQ numbers */
> - BTL_VERBOSE(("Restart The IB addr: sid %" PRIx64 " lid %d"
> - "in MCA_BTL_IB_ADDR_CONNECTED status,"
> - " Changing to MCA_BTL_IB_ADDR_CLOSED and starting from scratch\n",
> - endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
> - /* Switching back to closed and starting from scratch */
> - endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CLOSED;
> - /* destroy the qp */
> - /* the reciver site was alredy closed so all pending list must be clean ! */
> - assert (opal_list_is_empty(&endpoint->qps->no_wqe_pending_frags[0]));
> - assert (opal_list_is_empty(&endpoint->qps->no_wqe_pending_frags[1]));
> - if(ibv_destroy_qp(endpoint->qps[0].qp->lcl_qp))
> - BTL_ERROR(("Failed to destroy QP"));
> - case MCA_BTL_IB_ADDR_CLOSED:
> - case MCA_BTL_IB_ADDR_CONNECTING:
> - BTL_VERBOSE(("Restart The IB addr: sid %" PRIx64 " lid %d"
> - "in MCA_BTL_IB_ADDR_CONNECTING or MCA_BTL_IB_ADDR_CLOSED status,"
> - " starting from scratch\n",
> - endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
> - OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
> - /* xoob_module_start_connect() should automaticly handle all other cases */
> - if (OMPI_SUCCESS != xoob_module_start_connect(NULL, endpoint))
> - BTL_ERROR(("Failed to restart connection from MCA_BTL_IB_ADDR_CONNECTING/CLOSED"));
> - break;
> - default :
> - BTL_ERROR(("Invalid endpoint status %d", endpoint->ib_addr->status));
> - OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
> - }
> -}
> -
> -/* Init remote information structs */
> -static int init_rem_info(mca_btl_openib_rem_info_t *rem_info)
> -{
> - rem_info->rem_qps = (mca_btl_openib_rem_qp_info_t*)malloc(sizeof(mca_btl_openib_rem_qp_info_t));
> - if (NULL == rem_info->rem_qps) {
> - BTL_ERROR(("Failed to allocate memory for remote QP data\n"));
> - return OMPI_ERROR;
> - }
> - rem_info->rem_srqs = (mca_btl_openib_rem_srq_info_t*)malloc(sizeof(mca_btl_openib_rem_srq_info_t) *
> - mca_btl_openib_component.num_xrc_qps);
> - if (NULL == rem_info->rem_srqs) {
> - BTL_ERROR(("Failed to allocate memory for remote SRQ data\n"));
> - return OMPI_ERROR;
> - }
> - return OMPI_SUCCESS;
> -}
> -
> -/* Free remote information structs */
> -static void free_rem_info(mca_btl_openib_rem_info_t *rem_info)
> -{
> - if (NULL != rem_info->rem_qps) {
> - free(rem_info->rem_qps);
> - }
> - if (NULL != rem_info->rem_srqs) {
> - free(rem_info->rem_srqs);
> - }
> -}
> -
> -/*
> - * Non blocking RML recv callback. Read incoming QP and other info,
> - * and if this endpoint is trying to connect, reply with our QP info,
> - * otherwise try to modify QP's and establish reliable connection
> - */
> -static void xoob_rml_recv_cb(int status, ompi_process_name_t* process_name,
> - opal_buffer_t* buffer, ompi_rml_tag_t tag,
> - void* cbdata)
> -{
> - int rc;
> - uint8_t message_type;
> - uint16_t requested_lid = 0;
> - mca_btl_openib_rem_info_t rem_info;
> - mca_btl_openib_endpoint_t *ib_endpoint = NULL;
> -
> - if ( OMPI_SUCCESS != init_rem_info(&rem_info)) {
> - return;
> - }
> -
> - /* Get data. */
> - if ( OMPI_SUCCESS != xoob_receive_connect_data(&rem_info, &requested_lid, &message_type, buffer)) {
> - BTL_ERROR(("Failed to read data\n"));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> -
> - /* Processing message */
> - switch (message_type) {
> - case ENDPOINT_XOOB_CONNECT_REQUEST:
> - BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_REQUEST: lid %d, sid %" PRIx64 ", rlid %d\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id,
> - requested_lid));
> - ib_endpoint = xoob_find_endpoint(process_name,rem_info.rem_subnet_id,
> - requested_lid, message_type);
> - if ( NULL == ib_endpoint) {
> - BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_REQUEST."
> - " Failed to find endpoint with subnet %" PRIx64
> - " and LID %d",
> - rem_info.rem_subnet_id,requested_lid));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
> - /* prepost data on receiver site */
> - if (OMPI_SUCCESS != mca_btl_openib_endpoint_post_recvs(ib_endpoint)) {
> - BTL_ERROR(("Failed to post on XRC SRQs"));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - return;
> - }
> - /* we should create qp and send the info + srq to requestor */
> - rc = xoob_reply_first_connect(ib_endpoint, &rem_info);
> - if (OMPI_SUCCESS != rc) {
> - BTL_ERROR(("error in endpoint reply start connect"));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - return;
> - }
> - /* enable pooling for this btl */
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - break;
> - case ENDPOINT_XOOB_CONNECT_XRC_REQUEST:
> - /* pasha we don't need the remote lid here ??*/
> - BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_XRC_REQUEST: lid %d, sid %" PRIx64 "\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id));
> - ib_endpoint = xoob_find_endpoint(process_name,rem_info.rem_subnet_id,
> - requested_lid, message_type);
> - if ( NULL == ib_endpoint) {
> - BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_XRC_REQUEST."
> - " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
> - rem_info.rem_subnet_id,requested_lid));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - if (OMPI_SUCCESS == xoob_recv_qp_connect(ib_endpoint, &rem_info)) {
> - if (OMPI_SUCCESS != mca_btl_openib_endpoint_post_recvs(ib_endpoint)) {
> - BTL_ERROR(("Failed to post on XRC SRQs"));
> - mca_btl_openib_endpoint_invoke_error(ib_endpoint);
> - return;
> - }
> - OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
> - rc = xoob_send_connect_data(ib_endpoint, ENDPOINT_XOOB_CONNECT_XRC_RESPONSE);
> - if (OMPI_SUCCESS != rc) {
> - BTL_ERROR(("error in endpoint reply start connect"));
> - mca_btl_openib_endpoint_invoke_error(ib_endpoint);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - return;
> - }
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - } else {
> - /* The XRC recv qp was destroyed */
> - OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
> - rc = xoob_send_connect_data(ib_endpoint, ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE);
> - if (OMPI_SUCCESS != rc) {
> - BTL_ERROR(("error in endpoint reply start connect"));
> - mca_btl_openib_endpoint_invoke_error(ib_endpoint);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - return;
> - }
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - }
> - /* enable pooling for this btl */
> - break;
> - case ENDPOINT_XOOB_CONNECT_RESPONSE:
> - BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_RESPONSE: lid %d, sid %" PRIx64 "\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id));
> - ib_endpoint = xoob_find_endpoint(process_name, rem_info.rem_subnet_id,
> - rem_info.rem_lid, message_type);
> - if ( NULL == ib_endpoint) {
> - BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_RESPONSE."
> - " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
> - rem_info.rem_subnet_id,rem_info.rem_lid));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
> - /* we got all the data srq. switch the endpoint to connect mode */
> - XOOB_SET_REMOTE_INFO(ib_endpoint->rem_info, rem_info);
> - /* update ib_addr with remote qp number */
> - ib_endpoint->ib_addr->remote_xrc_rcv_qp_num =
> - ib_endpoint->rem_info.rem_qps->rem_qp_num;
> - BTL_VERBOSE(("rem_info: lid %d, sid %" PRIx64
> - " ep %d %" PRIx64 "\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id,
> - ib_endpoint->rem_info.rem_lid,
> - ib_endpoint->rem_info.rem_subnet_id));
> - if (OMPI_SUCCESS != xoob_send_qp_connect(ib_endpoint, &rem_info)) {
> - BTL_ERROR(("Failed to connect endpoint\n"));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
> - return;
> - }
> - mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
> - /* cpc complete unlock the endpoint */
> - break;
> - case ENDPOINT_XOOB_CONNECT_XRC_RESPONSE:
> - BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_XRC_RESPONSE: lid %d, sid %" PRIx64 "\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id));
> - ib_endpoint = xoob_find_endpoint(process_name, rem_info.rem_subnet_id,
> - rem_info.rem_lid, message_type);
> - if ( NULL == ib_endpoint) {
> - BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_XRC_RESPONSE."
> - " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
> - rem_info.rem_subnet_id,rem_info.rem_lid));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
> - /* we got srq numbers on our request */
> - XOOB_SET_REMOTE_INFO(ib_endpoint->rem_info, rem_info);
> - mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
> - /* cpc complete unlock the endpoint */
> - break;
> - case ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE:
> - /* The XRC recv site already was destroyed so we need
> - * start to bringup the connection from scratch */
> - BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE: lid %d, sid %" PRIx64 "\n",
> - rem_info.rem_lid,
> - rem_info.rem_subnet_id));
> - ib_endpoint = xoob_find_endpoint(process_name, rem_info.rem_subnet_id,
> - rem_info.rem_lid, message_type);
> - if ( NULL == ib_endpoint) {
> - BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE."
> - " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
> - rem_info.rem_subnet_id,rem_info.rem_lid));
> - mca_btl_openib_endpoint_invoke_error(NULL);
> - return;
> - }
> - xoob_restart_connect(ib_endpoint);
> - break;
> - default :
> - BTL_ERROR(("Invalid message type %d", message_type));
> - }
> -
> - free_rem_info(&rem_info);
> -}
> -
> -/*
> - * XOOB interface functions
> - */
> -
> -/* Quere for the XOOB priority - will be highest in XRC case */
> -static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
> - ompi_btl_openib_connect_base_module_t **cpc)
> -{
> - if (mca_btl_openib_component.num_xrc_qps <= 0) {
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: xoob CPC only supported with XRC receive queues; skipped on %s:%d",
> - ibv_get_device_name(openib_btl->device->ib_dev),
> - openib_btl->port_num);
> - return OMPI_ERR_NOT_SUPPORTED;
> - }
> -
> - *cpc = malloc(sizeof(ompi_btl_openib_connect_base_module_t));
> - if (NULL == *cpc) {
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: xoob CPC system error (malloc failed)");
> - return OMPI_ERR_OUT_OF_RESOURCE;
> - }
> -
> - /* If this btl supports XOOB, then post the RML message. But
> - ensure to only post it *once*, because another btl may have
> - come in before this and already posted it. */
> - if (!rml_recv_posted) {
> - ompi_rte_recv_buffer_nb(OMPI_NAME_WILDCARD,
> - OMPI_RML_TAG_XOPENIB,
> - OMPI_RML_PERSISTENT,
> - xoob_rml_recv_cb,
> - NULL);
> - rml_recv_posted = true;
> - }
> -
> - if (xoob_priority > 100) {
> - xoob_priority = 100;
> - } else if (xoob_priority < -1) {
> - xoob_priority = -1;
> - }
> -
> - (*cpc)->data.cbm_component = &ompi_btl_openib_connect_xoob;
> - (*cpc)->data.cbm_priority = xoob_priority;
> - (*cpc)->data.cbm_modex_message = NULL;
> - (*cpc)->data.cbm_modex_message_len = 0;
> -
> - (*cpc)->cbm_endpoint_init = NULL;
> - (*cpc)->cbm_start_connect = xoob_module_start_connect;
> - (*cpc)->cbm_endpoint_finalize = NULL;
> - (*cpc)->cbm_finalize = NULL;
> - (*cpc)->cbm_uses_cts = false;
> -
> - opal_output_verbose(5, ompi_btl_base_framework.framework_output,
> - "openib BTL: xoob CPC available for use on %s:%d",
> - ibv_get_device_name(openib_btl->device->ib_dev),
> - openib_btl->port_num);
> - return OMPI_SUCCESS;
> -}
> -
> -/* Open - this functions sets up any xoob specific commandline params */
> -static void xoob_component_register(void)
> -{
> - /* the priority is initialized in the declaration above */
> - (void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
> - "connect_xoob_priority",
> - "The selection method priority for xoob",
> - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
> - OPAL_INFO_LVL_9,
> - MCA_BASE_VAR_SCOPE_READONLY,
> - &xoob_priority);
> -
> - if (xoob_priority > 100) {
> - xoob_priority = 100;
> - } else if (xoob_priority < -1) {
> - xoob_priority = -1;
> - }
> -}
> -
> -/*
> - * Connect function. Start initiation of connections to a remote
> - * peer. We send our Queue Pair information over the RML/OOB
> - * communication mechanism. On completion of our send, a send
> - * completion handler is called.
> - */
> -static int xoob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
> - mca_btl_base_endpoint_t *endpoint)
> -{
> - int rc = OMPI_SUCCESS;
> -
> - OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock);
> - switch (endpoint->ib_addr->status) {
> - case MCA_BTL_IB_ADDR_CLOSED:
> - BTL_VERBOSE(("The IB addr: sid %" PRIx64 " lid %d"
> - "in MCA_BTL_IB_ADDR_CLOSED status,"
> - " sending ENDPOINT_XOOB_CONNECT_REQUEST\n",
> - endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
> - if (OMPI_SUCCESS != (rc = xoob_send_qp_create(endpoint))) {
> - break;
> - }
> -
> - /* Send connection info over to remote endpoint */
> - endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
> - endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CONNECTING;
> - if (OMPI_SUCCESS !=
> - (rc = xoob_send_connect_data(endpoint, ENDPOINT_XOOB_CONNECT_REQUEST))) {
> - BTL_ERROR(("Error sending connect request, error code %d", rc));
> - }
> - break;
> - case MCA_BTL_IB_ADDR_CONNECTING:
> - BTL_VERBOSE(("The IB addr: sid %" PRIx64 " lid %d"
> - "in MCA_BTL_IB_ADDR_CONNECTING status,"
> - " Subscribing to this address\n",
> - endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
> - /* some body already connectng to this machine, lets wait */
> - opal_list_append(&endpoint->ib_addr->pending_ep, &(endpoint->super));
> - endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
> - break;
> - case MCA_BTL_IB_ADDR_CONNECTED:
> - /* so we have the send qp, we just need the recive site.
> - * Send request for SRQ numbers */
> - BTL_VERBOSE(("The IB addr: sid %" PRIx64 " lid %d"
> - "in MCA_BTL_IB_ADDR_CONNECTED status,"
> - " sending ENDPOINT_XOOB_CONNECT_XRC_REQUEST\n",
> - endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid));
> - endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
> - if (OMPI_SUCCESS !=
> - (rc = xoob_send_connect_data(endpoint, ENDPOINT_XOOB_CONNECT_XRC_REQUEST))) {
> - BTL_ERROR(("error sending xrc connect request, error code %d", rc));
> - }
> - break;
> - default :
> - BTL_ERROR(("Invalid endpoint status %d", endpoint->ib_addr->status));
> - }
> - OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
> - return rc;
> -}
> -
> -
> -/*
> - * Finalize function. Cleanup RML non-blocking receive.
> - */
> -static int xoob_component_finalize(void)
> -{
> - if (rml_recv_posted) {
> - ompi_rte_recv_cancel(OMPI_NAME_WILDCARD, OMPI_RML_TAG_XOPENIB);
> - rml_recv_posted = false;
> - }
> -#if (ENABLE_DYNAMIC_SL)
> - btl_openib_connect_sl_finalize();
> -#endif
> - return OMPI_SUCCESS;
> -}
>
> Deleted: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.h
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.h 2013-11-13 23:16:53 EST (Wed, 13 Nov 2013) (r29702)
> +++ /dev/null 00:00:00 1970 (deleted)
> @@ -1,19 +0,0 @@
> -/*
> - * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
> - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
> - *
> - * $COPYRIGHT$
> - *
> - * Additional copyrights may follow
> - *
> - * $HEADER$
> - */
> -
> -#ifndef BTL_OPENIB_CONNECT_XOOB_H
> -#define BTL_OPENIB_CONNECT_XOOB_H
> -
> -#include "connect/connect.h"
> -
> -extern ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_xoob;
> -
> -#endif
> _______________________________________________
> svn-full mailing list
> svn-full_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/