Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn] svn:open-mpi r25445
From: Jeff Squyres (jsquyres_at_[hidden])
Date: 2011-11-07 09:22:58


This is probably why it would have been good to RFC about this. :-)

8 bytes can/does affect short message latency, no?

On Nov 6, 2011, at 11:29 PM, Nathan T. Hjelm wrote:

> I saw no need for an rfc for r25445/r25448. I did not seen any performance
> degradation with openib, sm, or vader (using ob1). Its only 8 bytes, and we
> (LANL) will absolutely require a 128 bit key for the ugni btl.
>
> Anyone else care to weigh in or do some measurements?
>
> -Nathan
>
> On Sun, 6 Nov 2011 23:05:57 -0500, George Bosilca <bosilca_at_[hidden]>
> wrote:
>> I might have missed some of the phone conferences, but this is a highly
>> critical modification of the one of the performance critical sub-system
> of
>> Open MPI. There was no RFC about and no prior warning. This change
> impacts
>> every other BTL and PML out there. Moreover, at this point there is no
>> assessment of the impact on performance (because the seg_key
>> modification).
>>
>> Please roll-back r25445 and r25448.
>>
>> george.
>>
>> On Nov 6, 2011, at 11:19 , hjelmn_at_[hidden] wrote:
>>
>>> Author: hjelmn
>>> Date: 2011-11-06 11:19:09 EST (Sun, 06 Nov 2011)
>>> New Revision: 25445
>>> URL: https://svn.open-mpi.org/trac/ompi/changeset/25445
>>>
>>> Log:
>>> changes to seg_key needed for a new btl
>>> Text files modified:
>>> trunk/ompi/mca/btl/btl.h | 8 ++++----
>>> trunk/ompi/mca/btl/mx/btl_mx.c | 6 +++---
>>> trunk/ompi/mca/btl/portals/btl_portals.c | 12 ++++++------
>>> trunk/ompi/mca/btl/portals/btl_portals_frag.c | 2 +-
>>> trunk/ompi/mca/btl/portals/btl_portals_rdma.c | 8 ++++----
>>> trunk/ompi/mca/btl/self/btl_self.c | 4 ++--
>>> trunk/ompi/mca/btl/sm/btl_sm.c | 6 +++---
>>> trunk/ompi/mca/btl/vader/btl_vader.c | 4 ++--
>>> trunk/ompi/mca/btl/vader/btl_vader_get.c | 6 +++---
>>> trunk/ompi/mca/btl/vader/btl_vader_put.c | 6 +++---
>>> 10 files changed, 31 insertions(+), 31 deletions(-)
>>>
>>> Modified: trunk/ompi/mca/btl/btl.h
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/btl.h (original)
>>> +++ trunk/ompi/mca/btl/btl.h 2011-11-06 11:19:09 EST (Sun, 06 Nov 2011)
>>> @@ -238,10 +238,10 @@
>>> #endif
>>> /** Memory segment key required by some RDMA networks */
>>> union {
>>> - uint32_t key32[2];
>>> - uint64_t key64;
>>> - uint8_t key8[8];
>>> - uintptr_t ptr;
>>> + uint32_t key32[4];
>>> + uint64_t key64[2];
>>> + uint8_t key8[16];
>>> + uintptr_t ptr[2];
>>> } seg_key;
>>> };
>>> typedef struct mca_btl_base_segment_t mca_btl_base_segment_t;
>>>
>>> Modified: trunk/ompi/mca/btl/mx/btl_mx.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/mx/btl_mx.c (original)
>>> +++ trunk/ompi/mca/btl/mx/btl_mx.c 2011-11-06 11:19:09 EST (Sun, 06 Nov
>> 2011)
>>> @@ -323,13 +323,13 @@
>>>
>>> frag->segment[0].seg_len = *size;
>>> opal_convertor_get_current_pointer( convertor,
>> (void**)&(frag->segment[0].seg_addr.pval) );
>>> - frag->segment[0].seg_key.key64 = (uint64_t)(intptr_t)frag;
>>> + frag->segment[0].seg_key.key64[0] = (uint64_t)(intptr_t)frag;
>>>
>>> mx_segment.segment_ptr = frag->segment[0].seg_addr.pval;
>>> mx_segment.segment_length = frag->segment[0].seg_len;
>>>
>>> mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
>>> - frag->segment[0].seg_key.key64,
>>> + frag->segment[0].seg_key.key64[0],
>>> BTL_MX_PUT_MASK, NULL, &(frag->mx_request) );
>>> if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
>>> opal_output( 0, "Fail to re-register a fragment with the MX NIC
>> ...\n" );
>>> @@ -396,7 +396,7 @@
>>>
>>> mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment,
>> descriptor->des_src_cnt,
>>> endpoint->mx_peer_addr,
>>> - descriptor->des_dst[0].seg_key.key64, frag,
>>> + descriptor->des_dst[0].seg_key.key64[0],
>> frag,
>>> &frag->mx_request );
>>> if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
>>> opal_output( 0, "mx_isend fails with error %s\n",
>> mx_strerror(mx_return) );
>>>
>>> Modified: trunk/ompi/mca/btl/portals/btl_portals.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/portals/btl_portals.c (original)
>>> +++ trunk/ompi/mca/btl/portals/btl_portals.c 2011-11-06 11:19:09 EST
>> (Sun, 06 Nov 2011)
>>> @@ -357,7 +357,7 @@
>>>
>>> frag->segments[0].seg_len = max_data;
>>> frag->segments[0].seg_addr.pval = iov.iov_base;
>>> - frag->segments[0].seg_key.key64 =
>>> + frag->segments[0].seg_key.key64[0] =
>>>
>> OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
>>> frag->base.des_src_cnt = 1;
>>>
>>> @@ -366,13 +366,13 @@
>>> "rdma src posted for frag 0x%lx, callback
>> 0x%lx, bits %"PRIu64", flags say %d" ,
>>> (unsigned long) frag,
>>> (unsigned long) frag->base.des_cbfunc,
>>> - frag->segments[0].seg_key.key64, flags));
>>> + frag->segments[0].seg_key.key64[0],
>> flags));
>>>
>>> /* create a match entry */
>>> ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
>>> OMPI_BTL_PORTALS_RDMA_TABLE_ID,
>>> *((mca_btl_base_endpoint_t*) peer),
>>> - frag->segments[0].seg_key.key64, /* match */
>>> + frag->segments[0].seg_key.key64[0], /* match
>> */
>>> 0, /* ignore */
>>> PTL_UNLINK,
>>> PTL_INS_AFTER,
>>> @@ -449,7 +449,7 @@
>>>
>>> frag->segments[0].seg_len = *size;
>>> opal_convertor_get_current_pointer( convertor,
>> (void**)&(frag->segments[0].seg_addr.pval) );
>>> - frag->segments[0].seg_key.key64 =
>>> + frag->segments[0].seg_key.key64[0] =
>>> OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key),
>> 1);
>>> frag->base.des_src = NULL;
>>> frag->base.des_src_cnt = 0;
>>> @@ -461,14 +461,14 @@
>>> "rdma dest posted for frag 0x%lx, callback
>> 0x%lx, bits %" PRIu64 " flags %d",
>>> (unsigned long) frag,
>>> (unsigned long) frag->base.des_cbfunc,
>>> - frag->segments[0].seg_key.key64,
>>> + frag->segments[0].seg_key.key64[0],
>>> flags));
>>>
>>> /* create a match entry */
>>> ret = PtlMEAttach(mca_btl_portals_module.portals_ni_h,
>>> OMPI_BTL_PORTALS_RDMA_TABLE_ID,
>>> *((mca_btl_base_endpoint_t*) peer),
>>> - frag->segments[0].seg_key.key64, /* match */
>>> + frag->segments[0].seg_key.key64[0], /* match */
>>> 0, /* ignore */
>>> PTL_UNLINK,
>>> PTL_INS_AFTER,
>>>
>>> Modified: trunk/ompi/mca/btl/portals/btl_portals_frag.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/portals/btl_portals_frag.c (original)
>>> +++ trunk/ompi/mca/btl/portals/btl_portals_frag.c 2011-11-06 11:19:09
>> EST (Sun, 06 Nov 2011)
>>> @@ -34,7 +34,7 @@
>>>
>>> frag->segments[0].seg_addr.pval = frag + 1;
>>> frag->segments[0].seg_len = frag->size;
>>> - frag->segments[0].seg_key.key64 = 0;
>>> + frag->segments[0].seg_key.key64[0] = 0;
>>>
>>> frag->md_h = PTL_INVALID_HANDLE;
>>> }
>>>
>>> Modified: trunk/ompi/mca/btl/portals/btl_portals_rdma.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/portals/btl_portals_rdma.c (original)
>>> +++ trunk/ompi/mca/btl/portals/btl_portals_rdma.c 2011-11-06 11:19:09
>> EST (Sun, 06 Nov 2011)
>>> @@ -39,7 +39,7 @@
>>> OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
>>> "PtlPut (rdma) fragment %lx, bits %" PRIx64,
>>> (unsigned long) frag,
>>> - frag->base.des_dst[0].seg_key.key64));
>>> + frag->base.des_dst[0].seg_key.key64[0]));
>>>
>>> assert(&mca_btl_portals_module == (mca_btl_portals_module_t*)
>> btl_base);
>>> assert(frag->md_h != PTL_INVALID_HANDLE);
>>> @@ -55,7 +55,7 @@
>>> *((mca_btl_base_endpoint_t*) btl_peer),
>>> OMPI_BTL_PORTALS_RDMA_TABLE_ID,
>>> 0, /* ac_index - not used*/
>>> - frag->base.des_dst[0].seg_key.key64, /* match bits */
>>> + frag->base.des_dst[0].seg_key.key64[0], /* match bits
>> */
>>> 0, /* remote offset - not used */
>>> *((ptl_hdr_data_t*) hdr_data)); /* hdr_data:
>> tag */
>>> if (ret != PTL_OK) {
>>> @@ -79,7 +79,7 @@
>>> OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
>>> "PtlGet (rdma) fragment %lx, bits %" PRIx64,
>>> (unsigned long) frag,
>>> - frag->base.des_src[0].seg_key.key64));
>>> + frag->base.des_src[0].seg_key.key64[0]));
>>>
>>> assert(&mca_btl_portals_module == (mca_btl_portals_module_t*)
>> btl_base);
>>> assert(frag->md_h != PTL_INVALID_HANDLE);
>>> @@ -91,7 +91,7 @@
>>> *((mca_btl_base_endpoint_t*) btl_peer),
>>> OMPI_BTL_PORTALS_RDMA_TABLE_ID,
>>> 0, /* ac_index - not used*/
>>> - frag->base.des_src[0].seg_key.key64, /* match bits */
>>> + frag->base.des_src[0].seg_key.key64[0], /* match bits
>> */
>>> 0); /* remote offset - not used */
>>> if (ret != PTL_OK) {
>>> opal_output(mca_btl_portals_component.portals_output,
>>>
>>> Modified: trunk/ompi/mca/btl/self/btl_self.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/self/btl_self.c (original)
>>> +++ trunk/ompi/mca/btl/self/btl_self.c 2011-11-06 11:19:09 EST (Sun, 06
>> Nov 2011)
>>> @@ -235,7 +235,7 @@
>>> frag->base.des_flags = flags;
>>> frag->base.des_src = &frag->segment;
>>> frag->base.des_src_cnt = 1;
>>> - frag->segment.seg_key.key64 = (uint64_t)(intptr_t)convertor;
>>> + frag->segment.seg_key.key64[0] = (uint64_t)(intptr_t)convertor;
>>> return &frag->base;
>>> }
>>>
>>> @@ -264,7 +264,7 @@
>>> /* setup descriptor to point directly to user buffer */
>>> opal_convertor_get_current_pointer( convertor,
>> (void**)&(frag->segment.seg_addr.pval) );
>>> frag->segment.seg_len = reserve + max_data;
>>> - frag->segment.seg_key.key64 = (uint64_t)(intptr_t)convertor;
>>> + frag->segment.seg_key.key64[0] = (uint64_t)(intptr_t)convertor;
>>> frag->base.des_dst = &frag->segment;
>>> frag->base.des_dst_cnt = 1;
>>> frag->base.des_flags = flags;
>>>
>>> Modified: trunk/ompi/mca/btl/sm/btl_sm.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/sm/btl_sm.c (original)
>>> +++ trunk/ompi/mca/btl/sm/btl_sm.c 2011-11-06 11:19:09 EST (Sun, 06 Nov
>> 2011)
>>> @@ -739,7 +739,7 @@
>>> if (OPAL_UNLIKELY(ioctl(sm_btl->knem_fd, KNEM_CMD_CREATE_REGION,
>> &knem_cr) < 0)) {
>>> return NULL;
>>> }
>>> - frag->segment.seg_key.key64 = knem_cr.cookie;
>>> + frag->segment.seg_key.key64[0] = knem_cr.cookie;
>>> }
>>> #endif
>>> frag->base.des_src = &(frag->segment);
>>> @@ -968,7 +968,7 @@
>>> recv_iovec.len = dst->seg_len;
>>> icopy.local_iovec_array = (uintptr_t)&recv_iovec;
>>> icopy.local_iovec_nr = 1;
>>> - icopy.remote_cookie = src->seg_key.key64;
>>> + icopy.remote_cookie = src->seg_key.key64[0];
>>> icopy.remote_offset = 0;
>>> icopy.write = 0;
>>>
>>> @@ -1044,7 +1044,7 @@
>>> sm_btl->knem_status_first_avail = 0;
>>> }
>>> ++sm_btl->knem_status_num_used;
>>> - icopy.remote_cookie = src->seg_key.key64;
>>> + icopy.remote_cookie = src->seg_key.key64[0];
>>> icopy.remote_offset = 0;
>>>
>>> /* Use the DMA flag if knem supports it *and* the segment length
>>>
>>> Modified: trunk/ompi/mca/btl/vader/btl_vader.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/vader/btl_vader.c (original)
>>> +++ trunk/ompi/mca/btl/vader/btl_vader.c 2011-11-06 11:19:09 EST (Sun,
>> 06 Nov 2011)
>>> @@ -643,7 +643,7 @@
>>>
>>> opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
>>>
>>> - frag->segment.seg_key.ptr = (uintptr_t) data_ptr;
>>> + frag->segment.seg_key.ptr[0] = (uintptr_t) data_ptr;
>>> frag->segment.seg_len = *size;
>>>
>>> frag->base.des_dst = &frag->segment;
>>> @@ -738,7 +738,7 @@
>>> return NULL;
>>> }
>>>
>>> - frag->segment.seg_key.ptr = (uintptr_t) data_ptr;
>>> + frag->segment.seg_key.ptr[0] = (uintptr_t) data_ptr;
>>> frag->segment.seg_len = reserve + *size;
>>> }
>>>
>>>
>>> Modified: trunk/ompi/mca/btl/vader/btl_vader_get.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/vader/btl_vader_get.c (original)
>>> +++ trunk/ompi/mca/btl/vader/btl_vader_get.c 2011-11-06 11:19:09 EST
>> (Sun, 06 Nov 2011)
>>> @@ -34,15 +34,15 @@
>>> void *rem_ptr;
>>>
>>> reg = vader_get_registation (endpoint->peer_smp_rank,
>>> - (void *) src->seg_key.ptr,
>>> + (void *) src->seg_key.ptr[0],
>>> src->seg_len, 0);
>>> if (OPAL_UNLIKELY(NULL == reg)) {
>>> return OMPI_ERROR;
>>> }
>>>
>>> - rem_ptr = vader_reg_to_ptr (reg, (void *) src->seg_key.ptr);
>>> + rem_ptr = vader_reg_to_ptr (reg, (void *) src->seg_key.ptr[0]);
>>>
>>> - vader_memmove ((void *) dst->seg_key.ptr, rem_ptr, size);
>>> + vader_memmove ((void *) dst->seg_key.ptr[0], rem_ptr, size);
>>>
>>> vader_return_registration (reg, endpoint->peer_smp_rank);
>>>
>>>
>>> Modified: trunk/ompi/mca/btl/vader/btl_vader_put.c
>>>
>>
> ==============================================================================
>>> --- trunk/ompi/mca/btl/vader/btl_vader_put.c (original)
>>> +++ trunk/ompi/mca/btl/vader/btl_vader_put.c 2011-11-06 11:19:09 EST
>> (Sun, 06 Nov 2011)
>>> @@ -34,15 +34,15 @@
>>> void *rem_ptr;
>>>
>>> reg = vader_get_registation (endpoint->peer_smp_rank,
>>> - (void *) dst->seg_key.ptr,
>>> + (void *) dst->seg_key.ptr[0],
>>> dst->seg_len, 0);
>>> if (OPAL_UNLIKELY(NULL == reg)) {
>>> return OMPI_ERROR;
>>> }
>>>
>>> - rem_ptr = vader_reg_to_ptr (reg, (void *) dst->seg_key.ptr);
>>> + rem_ptr = vader_reg_to_ptr (reg, (void *) dst->seg_key.ptr[0]);
>>>
>>> - vader_memmove (rem_ptr, (void *) src->seg_key.ptr, size);
>>> + vader_memmove (rem_ptr, (void *) src->seg_key.ptr[0], size);
>>>
>>> vader_return_registration (reg, endpoint->peer_smp_rank);
>>>
>>> _______________________________________________
>>> svn mailing list
>>> svn_at_[hidden]
>>> http://www.open-mpi.org/mailman/listinfo.cgi/svn
>>
>>
>> _______________________________________________
>> devel mailing list
>> devel_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/