Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r27881 - trunk/ompi/mca/btl/tcp
From: Jeff Squyres (jsquyres) (jsquyres_at_[hidden])
Date: 2013-01-22 16:28:54


George --

Similar question on this one: should it be CMR'ed to v1.7? (I kinda doubt it's appropriate for v1.6)

On Jan 21, 2013, at 6:41 AM, svn-commit-mailer_at_[hidden] wrote:

> Author: bosilca (George Bosilca)
> Date: 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013)
> New Revision: 27881
> URL: https://svn.open-mpi.org/trac/ompi/changeset/27881
>
> Log:
> Make the TCP BTL really fail-safe. It now trigger the error callback on
> all pending fragments when the destination goes down. This allows the PML
> to recalibrate its behavior, either find an alternate route or just give up.
>
> Text files modified:
> trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c | 29 +++++++++++++++++++++++++++--
> trunk/ompi/mca/btl/tcp/btl_tcp_frag.c | 7 ++++++-
> trunk/ompi/mca/btl/tcp/btl_tcp_proc.c | 2 +-
> 3 files changed, 34 insertions(+), 4 deletions(-)
>
> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c
> ==============================================================================
> --- trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c Mon Jan 21 06:35:42 2013 (r27880)
> +++ trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013) (r27881)
> @@ -2,7 +2,7 @@
> * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
> * University Research and Technology
> * Corporation. All rights reserved.
> - * Copyright (c) 2004-2008 The University of Tennessee and The University
> + * Copyright (c) 2004-2013 The University of Tennessee and The University
> * of Tennessee Research Foundation. All rights
> * reserved.
> * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
> @@ -295,6 +295,7 @@
> if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
> BTL_ERROR(("send() failed: %s (%d)",
> strerror(opal_socket_errno), opal_socket_errno));
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> return -1;
> }
> @@ -359,6 +360,7 @@
> mca_btl_tcp_endpoint_close(btl_endpoint);
> btl_endpoint->endpoint_sd = sd;
> if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) != OMPI_SUCCESS) {
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
> OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
> @@ -389,7 +391,6 @@
> {
> if(btl_endpoint->endpoint_sd < 0)
> return;
> - btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
> btl_endpoint->endpoint_retries++;
> opal_event_del(&btl_endpoint->endpoint_recv_event);
> opal_event_del(&btl_endpoint->endpoint_send_event);
> @@ -401,6 +402,24 @@
> btl_endpoint->endpoint_cache_pos = NULL;
> btl_endpoint->endpoint_cache_length = 0;
> #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */
> + /**
> + * If we keep failing to connect to the peer let the caller know about
> + * this situation by triggering all the pending fragments callback and
> + * reporting the error.
> + */
> + if( MCA_BTL_TCP_FAILED == btl_endpoint->endpoint_state ) {
> + mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag;
> + if( NULL == frag )
> + frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
> + while(NULL != frag) {
> + frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, OMPI_ERR_UNREACH);
> +
> + frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
> + }
> + } else {
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
> + }
> +
> }
>
> /*
> @@ -444,6 +463,7 @@
>
> /* remote closed connection */
> if(retval == 0) {
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> return -1;
> }
> @@ -453,6 +473,7 @@
> if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
> BTL_ERROR(("recv(%d) failed: %s (%d)",
> btl_endpoint->endpoint_sd, strerror(opal_socket_errno), opal_socket_errno));
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> return -1;
> }
> @@ -589,6 +610,7 @@
> address,
> btl_endpoint->endpoint_addr->addr_port, strerror(opal_socket_errno) ) );
> }
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> btl_endpoint->endpoint_retries++;
> return OMPI_ERR_UNREACH;
> @@ -599,6 +621,7 @@
> btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
> opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
> } else {
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> }
> return rc;
> @@ -645,6 +668,7 @@
> btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
> opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
> } else {
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> }
> }
> @@ -747,6 +771,7 @@
> default:
> OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
> BTL_ERROR(("invalid socket state(%d)", btl_endpoint->endpoint_state));
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> break;
> }
>
> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_frag.c
> ==============================================================================
> --- trunk/ompi/mca/btl/tcp/btl_tcp_frag.c Mon Jan 21 06:35:42 2013 (r27880)
> +++ trunk/ompi/mca/btl/tcp/btl_tcp_frag.c 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013) (r27881)
> @@ -2,7 +2,7 @@
> * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
> * University Research and Technology
> * Corporation. All rights reserved.
> - * Copyright (c) 2004-2006 The University of Tennessee and The University
> + * Copyright (c) 2004-2013 The University of Tennessee and The University
> * of Tennessee Research Foundation. All rights
> * reserved.
> * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
> @@ -115,12 +115,14 @@
> BTL_ERROR(("mca_btl_tcp_frag_send: writev error (%p, %lu)\n\t%s(%lu)\n",
> frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
> strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
> + frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(frag->endpoint);
> return false;
> default:
> BTL_ERROR(("mca_btl_tcp_frag_send: writev failed: %s (%d)",
> strerror(opal_socket_errno),
> opal_socket_errno));
> + frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(frag->endpoint);
> return false;
> }
> @@ -195,6 +197,7 @@
> cnt = readv(sd, frag->iov_ptr, num_vecs);
> if( 0 < cnt ) goto advance_iov_position;
> if( cnt == 0 ) {
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> return false;
> }
> @@ -207,12 +210,14 @@
> BTL_ERROR(("mca_btl_tcp_frag_recv: readv error (%p, %lu)\n\t%s(%lu)\n",
> frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
> strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> return false;
> default:
> BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)",
> strerror(opal_socket_errno),
> opal_socket_errno));
> + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
> mca_btl_tcp_endpoint_close(btl_endpoint);
> return false;
> }
>
> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_proc.c
> ==============================================================================
> --- trunk/ompi/mca/btl/tcp/btl_tcp_proc.c Mon Jan 21 06:35:42 2013 (r27880)
> +++ trunk/ompi/mca/btl/tcp/btl_tcp_proc.c 2013-01-21 06:41:08 EST (Mon, 21 Jan 2013) (r27881)
> @@ -680,7 +680,7 @@
> {
> size_t i;
> OPAL_THREAD_LOCK(&btl_proc->proc_lock);
> - for(i=0; i<btl_proc->proc_endpoint_count; i++) {
> + for(i = 0; i < btl_proc->proc_endpoint_count; i++) {
> if(btl_proc->proc_endpoints[i] == btl_endpoint) {
> memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
> (btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
> _______________________________________________
> svn-full mailing list
> svn-full_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/