Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] RFC: Change bml_base_btl_array to an array of pointers
From: Rolf vandeVaart (rolf.vandevaart_at_[hidden])
Date: 2010-04-07 10:55:08


WHAT: Change the bml_base_btl_array to be an array of
pointers to structs instead of array of structs themselves.

WHY: Some PMLs (like the failover one I am working on) may map
out a BTL when an error is seen. Currently, the act of mapping out a
BTL for communication means removing an entry from the
bml_base_btl_array and shuffling the remaining entries. The
problem is that pointers to entries in the bml_base_btl_array
are cached in the descriptor. After mapping out a BTL, these
pointers may no longer be valid since the entries they are
pointing to may have been moved. In order for me to have
my changes isolated to a PML, I need this change in the BML.

WHERE:
ompi/mca/bml/bml.h, ompi/mca/bml/r2/bml_r2.c,
ompi/mca/bml/base/bml_base_btl.c

TIMEOUT: Friday, April 16, 2010

-----------------
More details:

I have created a modified OB1 PML that can handle the failure of an IB
rail when running in dual-rail mode. When an error occurs on a rail,
I map out the BTL so it will no longer be used for communication.
As stated earlier, the mapping out means the removal of an entry in
the bml_base_btl_array and shuffling the other entries so it remains as
a contiguous array.

The problem is that I will still be polling the broken BTL and
receiving completion events. Many of these completion events have a
pointer to an entry in the bml_base_btl_array cached in the
descriptor. To prevent these pointers from becoming invalid, we want
to ensure that the structures do not move even though entries in the
array get moved.

Since the changes are not that big, I have included them here.

There may be some concern about performance, but I have trouble
seeing how these changes will have any effect.

 sr1-ubur-19 274 =>svn diff
Index: ompi/mca/bml/bml.h
===================================================================
--- ompi/mca/bml/bml.h (revision 22925)
+++ ompi/mca/bml/bml.h (working copy)
@@ -74,7 +74,7 @@
     size_t arr_size; /**< number available */
     size_t arr_reserve; /**< size of allocated
btl_proc array */
     size_t arr_index; /**< last used index*/
- mca_bml_base_btl_t* bml_btls; /**< array of bml btl's */
+ mca_bml_base_btl_t** bml_btls; /**< array of bml btl pointers */
 };
 typedef struct mca_bml_base_btl_array_t mca_bml_base_btl_array_t;

@@ -119,7 +119,7 @@
         return 0;
     }
 #endif
- return &array->bml_btls[array->arr_size++];
+ return array->bml_btls[array->arr_size++];
 }

 /**
@@ -134,7 +134,7 @@
     size_t i = 0;
     /* find the btl */
     for( i = 0; i < array->arr_size; i++ ) {
- if( array->bml_btls[i].btl == btl ) {
+ if( array->bml_btls[i]->btl == btl ) {
             /* make sure not to go out of bounds */
             for( ; i < array->arr_size-1; i++ ) {
                 /* move all btl's back by 1, so the found
@@ -165,7 +165,7 @@
         return 0;
     }
 #endif
- return &array->bml_btls[item_index];
+ return array->bml_btls[item_index];
 }

 /**
@@ -184,7 +184,7 @@
     }
 #endif
     if( 1 == array->arr_size ) {
- return &array->bml_btls[0]; /* force the return to avoid a jump */
+ return array->bml_btls[0]; /* force the return to avoid a jump */
     } else {
         size_t current_position = array->arr_index; /* force to always
start from zero */
         if( (current_position + 1) == array->arr_size ) {
@@ -192,7 +192,7 @@
         } else {
             array->arr_index = current_position + 1; /* continue */
         }
- return &array->bml_btls[current_position];
+ return array->bml_btls[current_position];
     }
 }

@@ -207,8 +207,8 @@
 {
     size_t i=0;
     for(i=0; i<array->arr_size; i++) {
- if(array->bml_btls[i].btl == btl) {
- return &array->bml_btls[i];
+ if(array->bml_btls[i]->btl == btl) {
+ return array->bml_btls[i];
         }
     }
     return NULL;
Index: ompi/mca/bml/r2/bml_r2.c
===================================================================
--- ompi/mca/bml/r2/bml_r2.c (revision 22925)
+++ ompi/mca/bml/r2/bml_r2.c (working copy)
@@ -127,10 +127,10 @@

 static int btl_bandwidth_compare(const void *v1, const void *v2)
 {
- mca_bml_base_btl_t *b1 = (mca_bml_base_btl_t*)v1,
- *b2 = (mca_bml_base_btl_t*)v2;
+ mca_bml_base_btl_t **b1 = (mca_bml_base_btl_t**)v1,
+ **b2 = (mca_bml_base_btl_t**)v2;

- return b2->btl->btl_bandwidth - b1->btl->btl_bandwidth;
+ return (*b2)->btl->btl_bandwidth - (*b1)->btl->btl_bandwidth;
 }

 /*
@@ -331,7 +331,7 @@

         /* sort BTLs in descending order according to bandwidth value */
         qsort(bml_endpoint->btl_send.bml_btls, n_size,
- sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
+ sizeof(mca_bml_base_btl_t*), btl_bandwidth_compare);

         bml_endpoint->btl_rdma_index = 0;
         for(n_index = 0; n_index < n_size; n_index++) {
Index: ompi/mca/bml/base/bml_base_btl.c
===================================================================
--- ompi/mca/bml/base/bml_base_btl.c (revision 22925)
+++ ompi/mca/bml/base/bml_base_btl.c (working copy)
@@ -35,7 +35,11 @@

 static void mca_bml_base_btl_array_destruct(mca_bml_base_btl_array_t*
array)
 {
+ int i;
     if(NULL != array->bml_btls) {
+ for (i=0; i < array->arr_size; i++) {
+ free(array->bml_btls[i]);
+ }
         free(array->bml_btls);
         array->bml_btls = NULL;
     }
@@ -53,15 +57,23 @@

 int mca_bml_base_btl_array_reserve(mca_bml_base_btl_array_t* array,
size_t size)
 {
- size_t old_len = sizeof(mca_bml_base_btl_t)*array->arr_reserve;
- size_t new_len = sizeof(mca_bml_base_btl_t)*size;
+ int i;
+ size_t old_len = sizeof(mca_bml_base_btl_t *)*array->arr_reserve;
+ size_t new_len = sizeof(mca_bml_base_btl_t *)*size;
     if(old_len >= new_len)
         return OMPI_SUCCESS;

- array->bml_btls = (mca_bml_base_btl_t*)realloc(array->bml_btls,
new_len);
+ array->bml_btls = (mca_bml_base_btl_t**)realloc(array->bml_btls,
new_len);
     if(NULL == array->bml_btls)
         return OMPI_ERR_OUT_OF_RESOURCE;
     memset((unsigned char*)array->bml_btls + old_len, 0, new_len-old_len);
+ /* Allocate the bml structs and the initalize the pointers to them */
+ for (i = array->arr_reserve; i < size; i++) {
+ array->bml_btls[i] = calloc(1, sizeof(mca_bml_base_btl_t));
+ if(NULL == array->bml_btls[i]) {
+ return OMPI_ERR_OUT_OF_RESOURCE;
+ }
+ }
     array->arr_reserve = size;
     return OMPI_SUCCESS;
 }