Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r27881 - trunk/ompi/mca/btl/tcp
From: George Bosilca (bosilca_at_[hidden])
Date: 2013-01-23 10:27:48


While we always strive to improve this functionality, it was available as a separate software packages for quite some time.

  George.

On Jan 23, 2013, at 08:05 , Jeff Squyres (jsquyres) <jsquyres_at_[hidden]> wrote:

> Are you going to develop anything further with regards to this functionality, and target that stuff for v1.7? Or should all of this just wait until 1.9?
>
> (I don't really care either way; I'm asking out of curiosity)
>
>
> On Jan 22, 2013, at 7:24 PM, George Bosilca <bosilca_at_[hidden]> wrote:
>
>> Nobody cared about error cases so far, I don't personally see any incentive to push this patch in the 1.7 right now. But I won't be against as it is not hurting either.
>>
>> George.
>>
>>
>> On Jan 22, 2013, at 16:28 , "Jeff Squyres (jsquyres)" <jsquyres_at_[hidden]> wrote:
>>
>>> George --
>>>
>>> Similar question on this one: should it be CMR'ed to v1.7? (I kinda doubt it's appropriate for v1.6)
>>>
>>>
>>> On Jan 21, 2013, at 6:41 AM, svn-commit-mailer_at_[hidden] wrote:
>>>
>>>> Author: bosilca (George Bosilca)
>>>> Date: 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013)
>>>> New Revision: 27881
>>>> URL: https://svn.open-mpi.org/trac/ompi/changeset/27881
>>>>
>>>> Log:
>>>> Make the TCP BTL really fail-safe. It now trigger the error callback on
>>>> all pending fragments when the destination goes down. This allows the PML
>>>> to recalibrate its behavior, either find an alternate route or just give up.
>>>>
>>>> Text files modified:
>>>> trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c | 29 +++++++++++++++++++++++++++--
>>>> trunk/ompi/mca/btl/tcp/btl_tcp_frag.c | 7 ++++++-
>>>> trunk/ompi/mca/btl/tcp/btl_tcp_proc.c | 2 +-
>>>> 3 files changed, 34 insertions(+), 4 deletions(-)
>>>>
>>>> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c
>>>> ==============================================================================
>>>> --- trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c Mon Jan 21 06:35:42 2013 (r27880)
>>>> +++ trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013) (r27881)
>>>> @@ -2,7 +2,7 @@
>>>> * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
>>>> * University Research and Technology
>>>> * Corporation. All rights reserved.
>>>> - * Copyright (c) 2004-2008 The University of Tennessee and The University
>>>> + * Copyright (c) 2004-2013 The University of Tennessee and The University
>>>> * of Tennessee Research Foundation. All rights
>>>> * reserved.
>>>> * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
>>>> @@ -295,6 +295,7 @@
>>>> if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
>>>> BTL_ERROR(("send() failed: %s (%d)",
>>>> strerror(opal_socket_errno), opal_socket_errno));
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> return -1;
>>>> }
>>>> @@ -359,6 +360,7 @@
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> btl_endpoint->endpoint_sd = sd;
>>>> if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) != OMPI_SUCCESS) {
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
>>>> OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
>>>> @@ -389,7 +391,6 @@
>>>> {
>>>> if(btl_endpoint->endpoint_sd < 0)
>>>> return;
>>>> - btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
>>>> btl_endpoint->endpoint_retries++;
>>>> opal_event_del(&btl_endpoint->endpoint_recv_event);
>>>> opal_event_del(&btl_endpoint->endpoint_send_event);
>>>> @@ -401,6 +402,24 @@
>>>> btl_endpoint->endpoint_cache_pos = NULL;
>>>> btl_endpoint->endpoint_cache_length = 0;
>>>> #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */
>>>> + /**
>>>> + * If we keep failing to connect to the peer let the caller know about
>>>> + * this situation by triggering all the pending fragments callback and
>>>> + * reporting the error.
>>>> + */
>>>> + if( MCA_BTL_TCP_FAILED == btl_endpoint->endpoint_state ) {
>>>> + mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag;
>>>> + if( NULL == frag )
>>>> + frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
>>>> + while(NULL != frag) {
>>>> + frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, OMPI_ERR_UNREACH);
>>>> +
>>>> + frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
>>>> + }
>>>> + } else {
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
>>>> + }
>>>> +
>>>> }
>>>>
>>>> /*
>>>> @@ -444,6 +463,7 @@
>>>>
>>>> /* remote closed connection */
>>>> if(retval == 0) {
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> return -1;
>>>> }
>>>> @@ -453,6 +473,7 @@
>>>> if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
>>>> BTL_ERROR(("recv(%d) failed: %s (%d)",
>>>> btl_endpoint->endpoint_sd, strerror(opal_socket_errno), opal_socket_errno));
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> return -1;
>>>> }
>>>> @@ -589,6 +610,7 @@
>>>> address,
>>>> btl_endpoint->endpoint_addr->addr_port, strerror(opal_socket_errno) ) );
>>>> }
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> btl_endpoint->endpoint_retries++;
>>>> return OMPI_ERR_UNREACH;
>>>> @@ -599,6 +621,7 @@
>>>> btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
>>>> opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
>>>> } else {
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> }
>>>> return rc;
>>>> @@ -645,6 +668,7 @@
>>>> btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
>>>> opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
>>>> } else {
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> }
>>>> }
>>>> @@ -747,6 +771,7 @@
>>>> default:
>>>> OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
>>>> BTL_ERROR(("invalid socket state(%d)", btl_endpoint->endpoint_state));
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> break;
>>>> }
>>>>
>>>> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_frag.c
>>>> ==============================================================================
>>>> --- trunk/ompi/mca/btl/tcp/btl_tcp_frag.c Mon Jan 21 06:35:42 2013 (r27880)
>>>> +++ trunk/ompi/mca/btl/tcp/btl_tcp_frag.c 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013) (r27881)
>>>> @@ -2,7 +2,7 @@
>>>> * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
>>>> * University Research and Technology
>>>> * Corporation. All rights reserved.
>>>> - * Copyright (c) 2004-2006 The University of Tennessee and The University
>>>> + * Copyright (c) 2004-2013 The University of Tennessee and The University
>>>> * of Tennessee Research Foundation. All rights
>>>> * reserved.
>>>> * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
>>>> @@ -115,12 +115,14 @@
>>>> BTL_ERROR(("mca_btl_tcp_frag_send: writev error (%p, %lu)\n\t%s(%lu)\n",
>>>> frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
>>>> strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
>>>> + frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(frag->endpoint);
>>>> return false;
>>>> default:
>>>> BTL_ERROR(("mca_btl_tcp_frag_send: writev failed: %s (%d)",
>>>> strerror(opal_socket_errno),
>>>> opal_socket_errno));
>>>> + frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(frag->endpoint);
>>>> return false;
>>>> }
>>>> @@ -195,6 +197,7 @@
>>>> cnt = readv(sd, frag->iov_ptr, num_vecs);
>>>> if( 0 < cnt ) goto advance_iov_position;
>>>> if( cnt == 0 ) {
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> return false;
>>>> }
>>>> @@ -207,12 +210,14 @@
>>>> BTL_ERROR(("mca_btl_tcp_frag_recv: readv error (%p, %lu)\n\t%s(%lu)\n",
>>>> frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
>>>> strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> return false;
>>>> default:
>>>> BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)",
>>>> strerror(opal_socket_errno),
>>>> opal_socket_errno));
>>>> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
>>>> mca_btl_tcp_endpoint_close(btl_endpoint);
>>>> return false;
>>>> }
>>>>
>>>> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_proc.c
>>>> ==============================================================================
>>>> --- trunk/ompi/mca/btl/tcp/btl_tcp_proc.c Mon Jan 21 06:35:42 2013 (r27880)
>>>> +++ trunk/ompi/mca/btl/tcp/btl_tcp_proc.c 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013) (r27881)
>>>> @@ -680,7 +680,7 @@
>>>> {
>>>> size_t i;
>>>> OPAL_THREAD_LOCK(&btl_proc->proc_lock);
>>>> - for(i=0; i<btl_proc->proc_endpoint_count; i++) {
>>>> + for(i = 0; i < btl_proc->proc_endpoint_count; i++) {
>>>> if(btl_proc->proc_endpoints[i] == btl_endpoint) {
>>>> memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
>>>> (btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
>>>> _______________________________________________
>>>> svn-full mailing list
>>>> svn-full_at_[hidden]
>>>> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full
>>>
>>>
>>> --
>>> Jeff Squyres
>>> jsquyres_at_[hidden]
>>> For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/
>>>
>>>
>>> _______________________________________________
>>> devel mailing list
>>> devel_at_[hidden]
>>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>>
>>
>> _______________________________________________
>> devel mailing list
>> devel_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>
>
> --
> Jeff Squyres
> jsquyres_at_[hidden]
> For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/
>
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel