Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r24830
From: Jeff Squyres (jsquyres) (jsquyres_at_[hidden])
Date: 2011-07-02 16:52:46


Were all the issueswith this code fixed? There were m4 issues and solaris issues, IIRC.

Sent from my phone. No type good.

On Jun 28, 2011, at 9:28 AM, "kliteyn_at_[hidden]" <kliteyn_at_[hidden]> wrote:

> Author: kliteyn
> Date: 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011)
> New Revision: 24830
> URL: https://svn.open-mpi.org/trac/ompi/changeset/24830
>
> Log:
> Supporting dynamic SL (#2674)
>
> - Added enable/disable configuration parameter for dynamic SL
> - All the dynamic SL code is conditionalized
> - Removed libibmad dependency
> - Using only one include - ib_types.h (part of opensm-devel package)
> - Removed all the macro and data types definitions, using the
> existing definitions from ib_types.h instead
> - general cleaning here and there
>
> The async mode is not implemented yet - stay tuned...
>
>
> Text files modified:
> trunk/ompi/config/ompi_check_openib.m4 | 38 ++++
> trunk/ompi/mca/btl/openib/btl_openib.h | 5
> trunk/ompi/mca/btl/openib/btl_openib_mca.c | 10
> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c | 309 +++++++++++++++++----------------------
> 4 files changed, 182 insertions(+), 180 deletions(-)
>
> Modified: trunk/ompi/config/ompi_check_openib.m4
> ==============================================================================
> --- trunk/ompi/config/ompi_check_openib.m4 (original)
> +++ trunk/ompi/config/ompi_check_openib.m4 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011)
> @@ -155,11 +155,21 @@
> [$ompi_cv_func_ibv_create_cq_args],
> [Number of arguments to ibv_create_cq])])])
>
> + #
> + # OpenIB dynamic SL
> + #
> + AC_ARG_ENABLE([openib-dynamic-sl],
> + [AC_HELP_STRING([--enable-openib-dynamic-sl],
> + [Enable openib BTL to query Subnet Manager for IB SL (default: enabled)])],
> + [enable_openib_dynamic_sl="$enableval"],
> + [enable_openib_dynamic_sl="yes"])
> +
> # Set these up so that we can do an AC_DEFINE below
> # (unconditionally)
> $1_have_xrc=0
> $1_have_rdmacm=0
> $1_have_ibcm=0
> + $1_have_dynamic_sl=0
>
> # If we have the openib stuff available, find out what we've got
> AS_IF([test "$ompi_check_openib_happy" = "yes"],
> @@ -176,6 +186,19 @@
> AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1])
> fi
>
> + if test "$enable_openib_dynamic_sl" = "yes"; then
> + # We need ib_types.h file, which is installed with opensm-devel
> + # package. However, ib_types.h has a bad include directive,
> + # which will cause AC_CHECK_HEADER to fail.
> + # So instead, we will look for another file that is also
> + # installed as part of opensm-devel package and included in
> + # ib_types.h, but it doesn't include any other IB-related files.
> + AC_CHECK_HEADER([infiniband/complib/cl_types_osd.h],
> + [$1_have_dynamic_sl=1],
> + [AC_MSG_ERROR([opensm-devel package not found - please install it or disable dynamic SL support with \"--disable-openib-dynamic-sl\"])],
> + [])
> + fi
> +
> # Do we have a recent enough RDMA CM? Need to have the
> # rdma_get_peer_addr (inline) function (originally appeared
> # in OFED v1.3).
> @@ -244,6 +267,15 @@
> else
> AC_MSG_RESULT([no])
> fi
> +
> + AC_MSG_CHECKING([if dynamic SL is enabled])
> + AC_DEFINE_UNQUOTED([OMPI_ENABLE_DYNAMIC_SL], [$$1_have_dynamic_sl],
> + [Enable features required for dynamic SL support])
> + if test "1" = "$$1_have_dynamic_sl"; then
> + AC_MSG_RESULT([yes])
> + else
> + AC_MSG_RESULT([no])
> + fi
>
> AC_MSG_CHECKING([if OpenFabrics RDMACM support is enabled])
> AC_DEFINE_UNQUOTED([OMPI_HAVE_RDMACM], [$$1_have_rdmacm],
> @@ -267,7 +299,11 @@
> AC_MSG_RESULT([no])
> fi
>
> - CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS"
> + AS_IF([test -z "$ompi_check_openib_dir"],
> + [openib_include_dir="/usr/include"],
> + [openib_include_dir="$ompi_check_openib_dir/include"])
> +
> + CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS -I$openib_include_dir/infiniband"
> LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS"
> LIBS="$ompi_check_openib_$1_save_LIBS"
>
>
> Modified: trunk/ompi/mca/btl/openib/btl_openib.h
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib.h (original)
> +++ trunk/ompi/mca/btl/openib/btl_openib.h 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011)
> @@ -52,6 +52,7 @@
> BEGIN_C_DECLS
>
> #define HAVE_XRC (1 == OMPI_HAVE_CONNECTX_XRC)
> +#define ENABLE_DYNAMIC_SL (1 == OMPI_ENABLE_DYNAMIC_SL)
>
> #define MCA_BTL_IB_LEAVE_PINNED 1
> #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
> @@ -215,7 +216,9 @@
> uint32_t ib_rnr_retry;
> uint32_t ib_max_rdma_dst_ops;
> uint32_t ib_service_level;
> - uint32_t ib_path_rec_service_level;
> +#if (ENABLE_DYNAMIC_SL)
> + uint32_t ib_path_record_service_level;
> +#endif
> int32_t use_eager_rdma;
> int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */
> int32_t eager_rdma_num;
>
> Modified: trunk/ompi/mca/btl/openib/btl_openib_mca.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib_mca.c (original)
> +++ trunk/ompi/mca/btl/openib/btl_openib_mca.c 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011)
> @@ -398,10 +398,14 @@
> }
> mca_btl_openib_component.ib_service_level = (uint32_t) ival;
>
> - CHECK(reg_int("ib_path_rec_service_level", NULL, "Enable getting InfiniBand service level from PathRecord "
> - "(must be >= 0, 0 = disabled, positive = try to get the service level from PathRecord)",
> +#if (ENABLE_DYNAMIC_SL)
> + CHECK(reg_int("ib_path_record_service_level", NULL,
> + "Enable getting InfiniBand service level from PathRecord "
> + "(must be >= 0, 0 = disabled, positive = try to get the "
> + "service level from PathRecord)",
> 0, &ival, REGINT_GE_ZERO));
> - mca_btl_openib_component.ib_path_rec_service_level = (uint32_t) ival;
> + mca_btl_openib_component.ib_path_record_service_level = (uint32_t) ival;
> +#endif
>
> CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages "
> "(-1 = use device default, 0 = do not use eager RDMA, "
>
> Modified: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (original)
> +++ trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011)
> @@ -44,6 +44,10 @@
> #include "connect/connect.h"
> #include "orte/util/show_help.h"
>
> +#if (ENABLE_DYNAMIC_SL)
> +#include <infiniband/iba/ib_types.h>
> +#endif
> +
> #ifdef HAVE_UNISTD_H
> #include <unistd.h>
> #endif
> @@ -54,109 +58,17 @@
> ENDPOINT_CONNECT_ACK
> } connect_message_type_t;
>
> -#ifndef __WINDOWS__
> -#define PACK_SUFFIX __attribute__((packed))
> -#else
> -#define PACK_SUFFIX
> -#endif
> -
> -#define SL_NOT_PRESENT 0x7F
> +#define SL_NOT_PRESENT 0xFF
> #define MAX_GET_SL_REC_RETRIES 20
> #define GET_SL_REC_RETRIES_TIMEOUT_MS 2000000
>
> -#define IB_SA_QPN 1
> -#define IB_GLOBAL_QKEY 0x80010000UL
> -#define IB_MGMT_BASE_VERSION 1
> -#define IB_MGMT_CLASS_SUBN_ADM 0x03
> -#define IB_MGMT_METHOD_GET 0x01
> -#define IB_SA_TID_GET_PATH_REC_0 0xCA000000UL
> -#define IB_SA_TID_GET_PATH_REC_1 0xBEEF0000UL
> -#define IB_PATH_REC_SL_MASK 0x000F
> -#define IB_SA_ATTR_PATH_REC 0x35
> -#define IB_SA_PATH_REC_DLID (1<<4)
> -#define IB_SA_PATH_REC_SLID (1<<5)
> -
> -
> -#ifdef __WINDOWS__
> - #pragma pack(push)
> - #pragma pack(1)
> -#endif
> -
> -struct ib_mad_hdr {
> - uint8_t base_version;
> - uint8_t mgmt_class;
> - uint8_t class_version;
> - uint8_t method;
> - uint16_t status;
> - uint16_t class_spec;
> - uint32_t tid[2];
> - uint16_t attr_id;
> - uint16_t resv;
> - uint32_t attr_mod;
> -} PACK_SUFFIX;
> -
> -struct ib_rmpp_hdr {
> - uint32_t raw[3];
> -} PACK_SUFFIX;
> -
> -struct ib_sa_hdr {
> - uint32_t sm_key[2];
> - uint16_t reserved;
> - uint16_t attrib_offset;
> - uint32_t comp_mask[2];
> -} PACK_SUFFIX;
> -
> -typedef union _ib_gid {
> - uint8_t raw[16];
> - struct _ib_gid_unicast {
> - uint64_t prefix;
> - uint64_t interface_id;
> - } PACK_SUFFIX unicast;
> - struct _ib_gid_multicast {
> - uint8_t header[2];
> - uint8_t raw_group_id[14];
> - } PACK_SUFFIX multicast;
> -} PACK_SUFFIX ib_gid_t;
> -
> -struct ib_path_record {
> - uint64_t service_id;
> - ib_gid_t dgit;
> - ib_gid_t sgit;
> - uint16_t dlid;
> - uint16_t slid;
> - uint32_t hop_flow_raw;
> - uint8_t tclass;
> - uint8_t num_path;
> - uint16_t pkey;
> - uint8_t reserved1;
> - uint8_t qos_class_sl;
> - uint8_t mtu;
> - uint8_t rate;
> - uint32_t preference__packet_lifetime__packet_lifetime_selector;
> - uint32_t reserved2[35];
> -} PACK_SUFFIX;
> -
> -union ib_sa_data {
> - struct ib_path_record path_record;
> -} PACK_SUFFIX;
> -
> -struct ib_mad_sa {
> - struct ib_mad_hdr mad_hdr;
> - struct ib_rmpp_hdr rmpp_hdr;
> - struct ib_sa_hdr sa_hdr;
> - union ib_sa_data sa_data;
> -} PACK_SUFFIX;
> -
> -#ifdef __WINDOWS__
> - #pragma pack(pop)
> -#endif
> -
> +#if (ENABLE_DYNAMIC_SL)
> static struct mca_btl_openib_sa_qp_cache {
> /* There will be a MR with the one send and receive buffer together */
> /* The send buffer is first, the receive buffer is second */
> /* The receive buffer in a UD queue pair needs room for the 40 byte GRH */
> /* The buffers are first in the structure for page alignment */
> - char send_recv_buffer[sizeof(struct ib_mad_sa) * 2 + 40];
> + char send_recv_buffer[MAD_BLOCK_SIZE * 2 + 40];
> struct mca_btl_openib_sa_qp_cache *next;
> struct ibv_context *context;
> char *device_name;
> @@ -168,8 +80,9 @@
> struct ibv_pd *pd;
> struct ibv_recv_wr rwr;
> struct ibv_sge rsge;
> - char sl_values[65536];
> + uint8_t sl_values[65536]; /* 64K */
> } *sa_qp_cache = 0;
> +#endif
>
> static int oob_priority = 50;
> static bool rml_recv_posted = false;
> @@ -198,27 +111,31 @@
> static void rml_recv_cb(int status, orte_process_name_t* process_name,
> opal_buffer_t* buffer, orte_rml_tag_t tag,
> void* cbdata);
> +
> +#if (ENABLE_DYNAMIC_SL)
> static int init_ud_qp(struct ibv_context *context_arg,
> struct mca_btl_openib_sa_qp_cache *cache);
> static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache,
> - struct ib_mad_sa *sag,
> - struct ibv_send_wr *swr,
> - struct ibv_sge *ssge,
> - uint16_t lid,
> - uint16_t rem_lid);
> + ib_sa_mad_t *sa_mad,
> + struct ibv_send_wr *swr,
> + struct ibv_sge *ssge,
> + uint16_t lid,
> + uint16_t rem_lid);
> static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache,
> - struct ib_mad_sa *sag,
> - struct ib_mad_sa *sar,
> - struct ibv_send_wr *swr,
> - uint16_t lid,
> - uint16_t rem_lid);
> -static int init_device(struct ibv_context *context_arg,
> - struct mca_btl_openib_sa_qp_cache *cache,
> - uint32_t port_num);
> -static int get_pathrecord_sl(struct ibv_context *context_arg,
> - uint32_t port_num,
> + ib_sa_mad_t *sa_mad,
> + ib_sa_mad_t *sar,
> + struct ibv_send_wr *swr,
> uint16_t lid,
> uint16_t rem_lid);
> +static int init_device(struct ibv_context *context_arg,
> + struct mca_btl_openib_sa_qp_cache *cache,
> + uint32_t port_num);
> +static int get_pathrecord_sl(struct ibv_context *context_arg,
> + uint32_t port_num,
> + uint16_t lid,
> + uint16_t rem_lid);
> +static void free_sa_qp_cache(void);
> +#endif
>
> /*
> * The "component" struct -- the top-level function pointers for the
> @@ -351,6 +268,33 @@
> return OMPI_SUCCESS;
> }
>
> +#if (ENABLE_DYNAMIC_SL)
> +static void free_sa_qp_cache(void)
> +{
> + struct mca_btl_openib_sa_qp_cache *cache, *tmp;
> +
> + cache = sa_qp_cache;
> + while (NULL != cache) {
> + /* free cache data */
> + if (cache->device_name)
> + free(cache->device_name);
> + if (NULL != cache->qp)
> + ibv_destroy_qp(cache->qp);
> + if (NULL != cache->ah)
> + ibv_destroy_ah(cache->ah);
> + if (NULL != cache->cq)
> + ibv_destroy_cq(cache->cq);
> + if (NULL != cache->mr)
> + ibv_dereg_mr(cache->mr);
> + if (NULL != cache->pd)
> + ibv_dealloc_pd(cache->pd);
> + tmp = cache->next;
> + free(cache);
> + cache = tmp;
> + }
> +}
> +#endif
> +
> /*
> * Component finalize function. Cleanup RML non-blocking receive.
> */
> @@ -360,7 +304,9 @@
> orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
> rml_recv_posted = false;
> }
> -
> + #if (ENABLE_DYNAMIC_SL)
> + free_sa_qp_cache();
> +#endif
> return OMPI_SUCCESS;
> }
>
> @@ -425,7 +371,7 @@
> */
> static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
> {
> - int i, rc;
> + int i;
> mca_btl_openib_module_t* openib_btl =
> (mca_btl_openib_module_t*)endpoint->endpoint_btl;
>
> @@ -446,18 +392,24 @@
> attr.ah_attr.dlid = endpoint->rem_info.rem_lid;
> attr.ah_attr.src_path_bits = openib_btl->src_path_bits;
> attr.ah_attr.port_num = openib_btl->port_num;
> - attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> - /* if user enable ib_path_rec_service_level - dynamically get the sl from PathRecord */
> - if (mca_btl_openib_component.ib_path_rec_service_level > 0) {
> - rc = get_pathrecord_sl(qp->context,
> +
> +#if (ENABLE_DYNAMIC_SL)
> + /* if user enabled dynamic SL, get it from PathRecord */
> + if (0 != mca_btl_openib_component.ib_path_record_service_level) {
> + int rc = get_pathrecord_sl(qp->context,
> attr.ah_attr.port_num,
> openib_btl->lid,
> attr.ah_attr.dlid);
> if (OMPI_ERROR == rc) {
> + free_sa_qp_cache();
> return OMPI_ERROR;
> }
> attr.ah_attr.sl = rc;
> }
> +#else
> + attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> +#endif
> +
> /* JMS to be filled in later dynamically */
> attr.ah_attr.static_rate = 0;
>
> @@ -1056,6 +1008,7 @@
> OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
> }
>
> +#if (ENABLE_DYNAMIC_SL)
> static int init_ud_qp(struct ibv_context *context_arg,
> struct mca_btl_openib_sa_qp_cache *cache)
> {
> @@ -1094,7 +1047,7 @@
> memset(&mattr, 0, sizeof(mattr));
> mattr.qp_state = IBV_QPS_INIT;
> mattr.port_num = cache->port_num;
> - mattr.qkey = IB_GLOBAL_QKEY;
> + mattr.qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY);
> rc = ibv_modify_qp(cache->qp, &mattr,
> IBV_QP_STATE |
> IBV_QP_PKEY_INDEX |
> @@ -1128,61 +1081,75 @@
> return OMPI_SUCCESS;
> }
> static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache,
> - struct ib_mad_sa *sag,
> - struct ibv_send_wr *swr,
> - struct ibv_sge *ssge,
> - uint16_t lid,
> - uint16_t rem_lid)
> + ib_sa_mad_t *sa_mad,
> + struct ibv_send_wr *swr,
> + struct ibv_sge *ssge,
> + uint16_t lid,
> + uint16_t rem_lid)
> {
> - memset(sag, 0, sizeof(*sag));
> + ib_path_rec_t *path_record = (ib_path_rec_t*)sa_mad->data;
> +
> memset(swr, 0, sizeof(*swr));
> memset(ssge, 0, sizeof(*ssge));
>
> - sag->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
> - sag->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
> - sag->mad_hdr.class_version = 2;
> - sag->mad_hdr.method = IB_MGMT_METHOD_GET;
> - sag->mad_hdr.attr_id = htons (IB_SA_ATTR_PATH_REC);
> - sag->mad_hdr.tid[0] = IB_SA_TID_GET_PATH_REC_0 + cache->qp->qp_num;
> - sag->mad_hdr.tid[1] = IB_SA_TID_GET_PATH_REC_1 + rem_lid;
> - sag->sa_hdr.comp_mask[1] =
> - htonl(IB_SA_PATH_REC_DLID | IB_SA_PATH_REC_SLID);
> - sag->sa_data.path_record.dlid = htons(rem_lid);
> - sag->sa_data.path_record.slid = htons(lid);
> + /* Initialize the standard MAD header. */
> + memset(sa_mad, 0, MAD_BLOCK_SIZE);
> + ib_mad_init_new((ib_mad_t *)sa_mad, /* mad header pointer */
> + IB_MCLASS_SUBN_ADM, /* management class */
> + (uint8_t) 2, /* version */
> + IB_MAD_METHOD_GET, /* method */
> + hton64((uint64_t)lid << 48 | /* transaction ID */
> + (uint64_t)rem_lid << 32 |
> + (uint64_t)cache->qp->qp_num << 8),
> + IB_MAD_ATTR_PATH_RECORD, /* attribute ID */
> + 0); /* attribute modifier */
> +
> + sa_mad->comp_mask = IB_PR_COMPMASK_DLID | IB_PR_COMPMASK_SLID;
> + path_record->dlid = htons(rem_lid);
> + path_record->slid = htons(lid);
>
> swr->sg_list = ssge;
> swr->num_sge = 1;
> swr->opcode = IBV_WR_SEND;
> swr->wr.ud.ah = cache->ah;
> - swr->wr.ud.remote_qpn = IB_SA_QPN;
> - swr->wr.ud.remote_qkey = IB_GLOBAL_QKEY;
> + swr->wr.ud.remote_qpn = ntohl(IB_QP1);
> + swr->wr.ud.remote_qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY);
> swr->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED;
>
> - ssge->addr = (uint64_t)(void *)sag;
> - ssge->length = sizeof(*sag);
> + ssge->addr = (uint64_t)(void *)sa_mad;
> + ssge->length = MAD_BLOCK_SIZE;
> ssge->lkey = cache->mr->lkey;
> }
>
> static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache,
> - struct ib_mad_sa *sag,
> - struct ib_mad_sa *sar,
> - struct ibv_send_wr *swr,
> - uint16_t lid,
> - uint16_t rem_lid)
> + ib_sa_mad_t *req_mad,
> + ib_sa_mad_t *resp_mad,
> + struct ibv_send_wr *swr,
> + uint16_t lid,
> + uint16_t rem_lid)
> {
> struct ibv_send_wr *bswr;
> struct ibv_wc wc;
> struct timeval get_sl_rec_last_sent, get_sl_rec_last_poll;
> struct ibv_recv_wr *brwr;
> int got_sl_value, get_sl_rec_retries, rc, ne, i;
> + ib_path_rec_t *req_path_record = ib_sa_mad_get_payload_ptr(req_mad);
> + ib_path_rec_t *resp_path_record = ib_sa_mad_get_payload_ptr(resp_mad);
>
> got_sl_value = 0;
> get_sl_rec_retries = 0;
>
> + rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr);
> + if (0 != rc) {
> + BTL_ERROR(("error posting receive on QP [0x%x] errno says: %s [%d]",
> + cache->qp->qp_num, strerror(errno), errno));
> + return OMPI_ERROR;
> + }
> +
> while (0 == got_sl_value) {
> rc = ibv_post_send(cache->qp, swr, &bswr);
> if (0 != rc) {
> - BTL_ERROR(("error posing send on QP[%x] errno says: %s [%d]",
> + BTL_ERROR(("error posting send on QP [0x%x] errno says: %s [%d]",
> cache->qp->qp_num, strerror(errno), errno));
> return OMPI_ERROR;
> }
> @@ -1190,25 +1157,23 @@
>
> while (0 == got_sl_value) {
> ne = ibv_poll_cq(cache->cq, 1, &wc);
> - if (ne > 0
> - && wc.status == IBV_WC_SUCCESS
> - && wc.opcode == IBV_WC_RECV
> - && wc.byte_len >= sizeof(*sar)
> - && sar->mad_hdr.tid[0] == sag->mad_hdr.tid[0]
> - && sar->mad_hdr.tid[1] == sag->mad_hdr.tid[1]) {
> - if (0 == sar->mad_hdr.status
> - && sar->sa_data.path_record.slid == htons(lid)
> - && sar->sa_data.path_record.dlid == htons(rem_lid)) {
> + if (ne > 0 &&
> + IBV_WC_SUCCESS == wc.status &&
> + IBV_WC_RECV == wc.opcode &&
> + wc.byte_len >= MAD_BLOCK_SIZE &&
> + resp_mad->trans_id == req_mad->trans_id) {
> + if (0 == resp_mad->status &&
> + req_path_record->slid == htons(lid) &&
> + req_path_record->dlid == htons(rem_lid)) {
> /* Everything matches, so we have the desired SL */
> - cache->sl_values[rem_lid] =
> - sar->sa_data.path_record.qos_class_sl & IB_PATH_REC_SL_MASK;
> + cache->sl_values[rem_lid] = ib_path_rec_sl(resp_path_record);
> got_sl_value = 1; /* still must repost recieve buf */
> } else {
> /* Probably bad status, unlikely bad lid match. We will */
> /* ignore response and let it time out so that we do a */
> /* retry, but after a delay. We must make a new TID so */
> /* the SM doesn't see it as the same request. */
> - sag->mad_hdr.tid[1] += 0x10000;
> + req_mad->trans_id += hton64(1);
> }
> rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr);
> if (0 != rc) {
> @@ -1249,7 +1214,6 @@
> {
> struct ibv_ah_attr aattr;
> struct ibv_port_attr pattr;
> - struct ibv_recv_wr *brwr;
> int rc;
>
> cache->context = ibv_open_device(context_arg->device);
> @@ -1315,16 +1279,10 @@
> cache->rwr.sg_list = &(cache->rsge);
> memset(&(cache->rsge), 0, sizeof(cache->rsge));
> cache->rsge.addr = (uint64_t)(void *)
> - (cache->send_recv_buffer + sizeof(struct ib_mad_sa));
> - cache->rsge.length = sizeof(struct ib_mad_sa) + 40;
> + (cache->send_recv_buffer + MAD_BLOCK_SIZE);
> + cache->rsge.length = MAD_BLOCK_SIZE + 40;
> cache->rsge.lkey = cache->mr->lkey;
>
> - rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr);
> - if (0 != rc) {
> - BTL_ERROR(("error posing receive on QP[%x] errno says: %s [%d]",
> - cache->qp->qp_num, strerror(errno), errno));
> - return OMPI_ERROR;
> - }
> return 0;
> }
>
> @@ -1334,7 +1292,7 @@
> uint16_t rem_lid)
> {
> struct ibv_send_wr swr;
> - struct ib_mad_sa *sag, *sar;
> + ib_sa_mad_t *req_mad, *resp_mad;
> struct ibv_sge ssge;
> struct mca_btl_openib_sa_qp_cache *cache;
> long page_size = sysconf(_SC_PAGESIZE);
> @@ -1342,8 +1300,8 @@
>
> /* search for a cached item */
> for (cache = sa_qp_cache; cache; cache = cache->next) {
> - if (strcmp(cache->device_name,
> - ibv_get_device_name(context_arg->device)) == 0
> + if (0 == strcmp(cache->device_name,
> + ibv_get_device_name(context_arg->device))
> && cache->port_num == port_num) {
> break;
> }
> @@ -1365,15 +1323,15 @@
>
> /* if the destination lid SL value is not in the cache, go get it */
> if (SL_NOT_PRESENT == cache->sl_values[rem_lid]) {
> - /* sag is first buffer, where we build the SA Get request to send */
> - sag = (struct ib_mad_sa *)(cache->send_recv_buffer);
> + /* sa_mad is first buffer, where we build the SA Get request to send */
> + req_mad = (ib_sa_mad_t *)(cache->send_recv_buffer);
>
> - init_sa_mad(cache, sag, &swr, &ssge, lid, rem_lid);
> + init_sa_mad(cache, req_mad, &swr, &ssge, lid, rem_lid);
>
> - /* sar is the receive buffer (40 byte GRH) */
> - sar = (struct ib_mad_sa *)(cache->send_recv_buffer + sizeof(struct ib_mad_sa) + 40);
> + /* resp_mad is the receive buffer (40 byte offset is for GRH) */
> + resp_mad = (ib_sa_mad_t *)(cache->send_recv_buffer + MAD_BLOCK_SIZE + 40);
>
> - rc = get_pathrecord_info(cache, sag, sar, &swr, lid, rem_lid);
> + rc = get_pathrecord_info(cache, req_mad, resp_mad, &swr, lid, rem_lid);
> if (0 != rc) {
> return rc;
> }
> @@ -1382,3 +1340,4 @@
> /* now all we do is send back the value laying around */
> return cache->sl_values[rem_lid];
> }
> +#endif
> _______________________________________________
> svn-full mailing list
> svn-full_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full