Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] [OMPI svn] svn:open-mpi r26077 (fwd)
From: Nathan Hjelm (hjelmn_at_[hidden])
Date: 2012-03-01 10:56:43


Found a pretty nasty frag leak (and a minor one) in ob1 (see commit below). If this fix addresses some hangs we are seeing on infiniband LANL might want a 1.4.6 rolled (or a faster rollout for 1.6.0).

-Nathan

---------- Forwarded message ----------
Date: Thu, 1 Mar 2012 08:53:39 -0700
From: hjelmn_at_[hidden]
Reply-To: devel_at_[hidden]
To: svn_at_[hidden]
Subject: [OMPI svn] svn:open-mpi r26077

Author: hjelmn
Date: 2012-03-01 10:53:39 EST (Thu, 01 Mar 2012)
New Revision: 26077
URL: https://svn.open-mpi.org/trac/ompi/changeset/26077

Log:
ob1: fix two fragment leaks
  - MAJOR! get src descriptor leaks if mca_bml_base_send fails
  - minor. descriptor leaked in mca_pml_send_request_start_copy if the btl returns OMPI_ERR_RESOURCE_BUSY.
Text files modified:
    trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c | 27 ++++++++++++++++-----------
    1 files changed, 16 insertions(+), 11 deletions(-)

Modified: trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c
==============================================================================
--- trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c (original)
+++ trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c 2012-03-01 10:53:39 EST (Thu, 01 Mar 2012)
@@ -1,3 +1,4 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
  /*
   * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   * University Research and Technology
@@ -12,6 +13,8 @@
   * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
   * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
+ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights
+ * reserved.
   * $COPYRIGHT$
   *
   * Additional copyrights may follow
@@ -546,15 +549,14 @@
          }
          return OMPI_SUCCESS;
      }
- switch(OPAL_SOS_GET_ERROR_CODE(rc)) {
- case OMPI_ERR_RESOURCE_BUSY:
- /* No more resources. Allow the upper level to queue the send */
- rc = OMPI_ERR_OUT_OF_RESOURCE;
- break;
- default:
- mca_bml_base_free(bml_btl, des);
- break;
+
+ if (OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc)) {
+ /* No more resources. Allow the upper level to queue the send */
+ rc = OMPI_ERR_OUT_OF_RESOURCE;
      }
+
+ mca_bml_base_free (bml_btl, des);
+
      return rc;
  }

@@ -631,7 +633,7 @@
       * operation is achieved.
       */

- mca_btl_base_descriptor_t* des;
+ mca_btl_base_descriptor_t *des, *src = NULL;
      mca_btl_base_segment_t* segment;
      mca_pml_ob1_hdr_t* hdr;
      bool need_local_cb = false;
@@ -640,7 +642,6 @@
      bml_btl = sendreq->req_rdma[0].bml_btl;
      if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
          mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
- mca_btl_base_descriptor_t* src;
          size_t i;
          size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted;

@@ -781,6 +782,10 @@
          return OMPI_SUCCESS;
      }
      mca_bml_base_free(bml_btl, des);
+ if (NULL != src) {
+ mca_bml_base_free (bml_btl, src);
+ }
+
      return rc;
  }

@@ -1144,7 +1149,7 @@
                                0,
                                &frag->rdma_length,
                                MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
- MCA_BTL_DES_FLAGS_PUT,
+ MCA_BTL_DES_FLAGS_PUT,
                                &des );

      if( OPAL_UNLIKELY(NULL == des) ) {
_______________________________________________
svn mailing list
svn_at_[hidden]
http://www.open-mpi.org/mailman/listinfo.cgi/svn