Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r22669
From: Jeff Squyres (jsquyres_at_[hidden])
Date: 2010-02-19 08:01:27


Yo George --

This commit has a bunch of indenting changes, so at first blush, it's hard to tell exactly what it does.

Can you give a short explanation of what this commit does?

Thanks!

On Feb 19, 2010, at 2:10 AM, <bosilca_at_[hidden]> wrote:

> Author: bosilca
> Date: 2010-02-19 02:10:32 EST (Fri, 19 Feb 2010)
> New Revision: 22669
> URL: https://svn.open-mpi.org/trac/ompi/changeset/22669
>
> Log:
> Unrestricted number of interfaces.
>
> Text files modified:
> trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c | 6
> trunk/ompi/mca/btl/tcp/btl_tcp_proc.c | 290 ++++++++++++++++++++++-----------------
> trunk/ompi/mca/btl/tcp/btl_tcp_proc.h | 3
> 3 files changed, 165 insertions(+), 134 deletions(-)
>
> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c
> ==============================================================================
> --- trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c (original)
> +++ trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c 2010-02-19 02:10:32 EST (Fri, 19 Feb 2010)
> @@ -315,7 +315,7 @@
> {
> /* send process identifier to remote endpoint */
> mca_btl_tcp_proc_t* btl_proc = mca_btl_tcp_proc_local();
> - orte_process_name_t guid = btl_proc->proc_name;
> + orte_process_name_t guid = btl_proc->proc_ompi->proc_name;
>
> ORTE_PROCESS_NAME_HTON(guid);
> if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) !=
> @@ -479,7 +479,9 @@
> }
> ORTE_PROCESS_NAME_NTOH(guid);
> /* compare this to the expected values */
> - if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) {
> + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
> + &btl_proc->proc_ompi->proc_name,
> + &guid)) {
> BTL_ERROR(("received unexpected process identifier %s",
> ORTE_NAME_PRINT(&guid)));
> mca_btl_tcp_endpoint_close(btl_endpoint);
>
> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_proc.c
> ==============================================================================
> --- trunk/ompi/mca/btl/tcp/btl_tcp_proc.c (original)
> +++ trunk/ompi/mca/btl/tcp/btl_tcp_proc.c 2010-02-19 02:10:32 EST (Fri, 19 Feb 2010)
> @@ -2,7 +2,7 @@
> * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
> * University Research and Technology
> * Corporation. All rights reserved.
> - * Copyright (c) 2004-2008 The University of Tennessee and The University
> + * Copyright (c) 2004-2010 The University of Tennessee and The University
> * of Tennessee Research Foundation. All rights
> * reserved.
> * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
> @@ -40,17 +40,17 @@
> static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc);
> static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc);
>
> -mca_btl_tcp_interface_t* local_interfaces[MAX_KERNEL_INTERFACES];
> -mca_btl_tcp_interface_t* peer_interfaces[MAX_KERNEL_INTERFACES];
> -int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
> -int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
> -size_t num_local_interfaces;
> -size_t num_peer_interfaces;
> -unsigned int *best_assignment;
> -int max_assignment_weight;
> -int max_assignment_cardinality;
> -enum mca_btl_tcp_connection_quality **weights;
> -struct mca_btl_tcp_addr_t ***best_addr;
> +static mca_btl_tcp_interface_t** local_interfaces = NULL;
> +static int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
> +static size_t num_local_interfaces, max_local_interfaces;
> +static mca_btl_tcp_interface_t** peer_interfaces = NULL;
> +static size_t num_peer_interfaces, max_peer_interfaces;
> +static int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
> +static unsigned int *best_assignment;
> +static int max_assignment_weight;
> +static int max_assignment_cardinality;
> +static enum mca_btl_tcp_connection_quality **weights;
> +static struct mca_btl_tcp_addr_t ***best_addr;
>
> OBJ_CLASS_INSTANCE( mca_btl_tcp_proc_t,
> opal_list_item_t,
> @@ -76,7 +76,7 @@
> /* remove from list of all proc instances */
> OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
> opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs,
> - orte_util_hash_name(&tcp_proc->proc_name));
> + orte_util_hash_name(&tcp_proc->proc_ompi->proc_name));
> OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
>
> /* release resources */
> @@ -113,7 +113,6 @@
> if(NULL == btl_proc)
> return NULL;
> btl_proc->proc_ompi = ompi_proc;
> - btl_proc->proc_name = ompi_proc->proc_name;
>
> /* add to hash table of all proc instance */
> opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
> @@ -176,8 +175,6 @@
> int assignment_weight = 0;
> int assignment_cardinality = 0;
>
> -
> -
> if(max_interfaces < num_peer_interfaces) {
> max_interfaces = num_peer_interfaces;
> }
> @@ -232,64 +229,34 @@
> interface->inuse = 0;
> }
>
> -
> -/*
> - * Note that this routine must be called with the lock on the process
> - * already held. Insert a btl instance into the proc array and assign
> - * it an address.
> - */
> -int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
> - mca_btl_base_endpoint_t* btl_endpoint )
> +static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(void)
> {
> - size_t i, j;
> - struct sockaddr_storage endpoint_addr_ss, local_addr;
> - int idx, rc;
> - int *a = NULL;
> - unsigned int perm_size;
> - char **include;
> - char **exclude;
> - char **argv;
> - bool skip;
> + struct sockaddr_storage local_addr;
> char local_if_name[IF_NAMESIZE];
> + char **include, **exclude, **argv;
> + bool skip;
> + int idx;
>
> - num_local_interfaces = 0;
> - num_peer_interfaces = 0;
> -
> -#ifndef WORDS_BIGENDIAN
> - /* if we are little endian and our peer is not so lucky, then we
> - need to put all information sent to him in big endian (aka
> - Network Byte Order) and expect all information received to
> - be in NBO. Since big endian machines always send and receive
> - in NBO, we don't care so much about that case. */
> - if (btl_proc->proc_ompi->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
> - btl_endpoint->endpoint_nbo = true;
> - }
> -#endif
> -
> - /* insert into endpoint array */
> - btl_endpoint->endpoint_proc = btl_proc;
> - btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
> + if( NULL != local_interfaces )
> + return local_interfaces;
>
> + max_local_interfaces = MAX_KERNEL_INTERFACES;
> + num_local_interfaces = 0;
> + local_interfaces = (mca_btl_tcp_interface_t**)calloc( max_local_interfaces, sizeof(mca_btl_tcp_interface_t*) );
> + if( NULL == local_interfaces )
> + return NULL;
>
> memset(local_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
> - memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
> - memset(local_interfaces, 0, sizeof(local_interfaces));
> - memset(peer_interfaces, 0, sizeof(peer_interfaces));
>
> /* Collect up the list of included and excluded interfaces, if any */
> include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,',');
> exclude = opal_argv_split(mca_btl_tcp_component.tcp_if_exclude,',');
>
> /*
> - * the following two blocks shout CODE DUPLICATION. We are aware of
> - * the problem
> - */
> -
> - /*
> * identify all kernel interfaces and the associated addresses of
> * the local node
> */
> - for (idx = opal_ifbegin(); idx >= 0; idx=opal_ifnext (idx)) {
> + for( idx = opal_ifbegin(); idx >= 0; idx = opal_ifnext (idx) ) {
> int kindex, index;
>
> opal_ifindextoaddr (idx, (struct sockaddr*) &local_addr, sizeof (local_addr));
> @@ -310,10 +277,6 @@
> }
> argv++;
> }
> - if (true == skip) {
> - /* This interface is not part of the included set, so skip it */
> - continue;
> - }
> }
>
> /* If we were given a list of excluded interfaces, then check to see if the
> @@ -333,9 +296,10 @@
> }
> argv++;
> }
> - if(true == skip) {
> - continue;
> - }
> + }
> + if (true == skip) {
> + /* This interface is not part of the requested set, so skip it */
> + continue;
> }
>
> kindex = opal_ifindextokindex(idx);
> @@ -345,48 +309,103 @@
> if(-1 == index) {
> index = num_local_interfaces++;
> local_kindex_to_index[kindex] = index;
> +
> + if( num_local_interfaces == max_local_interfaces ) {
> + max_local_interfaces <<= 1;
> + local_interfaces = (mca_btl_tcp_interface_t**)realloc( local_interfaces,
> + max_local_interfaces * sizeof(mca_btl_tcp_interface_t*) );
> + if( NULL == local_interfaces )
> + return NULL;
> + }
> local_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t));
> assert(NULL != local_interfaces[index]);
> mca_btl_tcp_initialise_interface(local_interfaces[index], kindex, index);
> }
>
> switch(local_addr.ss_family) {
> - case AF_INET:
> - /* if AF is disabled, skip it completely */
> - if (4 == mca_btl_tcp_component.tcp_disable_family) {
> - continue;
> - }
> + case AF_INET:
> + /* if AF is disabled, skip it completely */
> + if (4 == mca_btl_tcp_component.tcp_disable_family) {
> + continue;
> + }
>
> - local_interfaces[local_kindex_to_index[kindex]]->ipv4_address =
> - (struct sockaddr_storage*) malloc(sizeof(local_addr));
> - memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address,
> - &local_addr, sizeof(local_addr));
> - opal_ifindextomask(idx,
> - &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask,
> - sizeof(int));
> - break;
> - case AF_INET6:
> - /* if AF is disabled, skip it completely */
> - if (6 == mca_btl_tcp_component.tcp_disable_family) {
> - continue;
> - }
> + local_interfaces[local_kindex_to_index[kindex]]->ipv4_address =
> + (struct sockaddr_storage*) malloc(sizeof(local_addr));
> + memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address,
> + &local_addr, sizeof(local_addr));
> + opal_ifindextomask(idx,
> + &local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask,
> + sizeof(int));
> + break;
> + case AF_INET6:
> + /* if AF is disabled, skip it completely */
> + if (6 == mca_btl_tcp_component.tcp_disable_family) {
> + continue;
> + }
>
> - local_interfaces[local_kindex_to_index[kindex]]->ipv6_address
> - = (struct sockaddr_storage*) malloc(sizeof(local_addr));
> - memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address,
> - &local_addr, sizeof(local_addr));
> - opal_ifindextomask(idx,
> - &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask,
> - sizeof(int));
> - break;
> - default:
> - opal_output(0, "unknown address family for tcp: %d\n",
> + local_interfaces[local_kindex_to_index[kindex]]->ipv6_address
> + = (struct sockaddr_storage*) malloc(sizeof(local_addr));
> + memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address,
> + &local_addr, sizeof(local_addr));
> + opal_ifindextomask(idx,
> + &local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask,
> + sizeof(int));
> + break;
> + default:
> + opal_output(0, "unknown address family for tcp: %d\n",
> local_addr.ss_family);
> }
> }
> opal_argv_free(include);
> opal_argv_free(exclude);
>
> + return local_interfaces;
> +}
> +/*
> + * Note that this routine must be called with the lock on the process
> + * already held. Insert a btl instance into the proc array and assign
> + * it an address.
> + */
> +int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
> + mca_btl_base_endpoint_t* btl_endpoint )
> +{
> + struct sockaddr_storage endpoint_addr_ss;
> + unsigned int perm_size;
> + int rc, *a = NULL;
> + size_t i, j;
> +
> +#ifndef WORDS_BIGENDIAN
> + /* if we are little endian and our peer is not so lucky, then we
> + need to put all information sent to him in big endian (aka
> + Network Byte Order) and expect all information received to
> + be in NBO. Since big endian machines always send and receive
> + in NBO, we don't care so much about that case. */
> + if (btl_proc->proc_ompi->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
> + btl_endpoint->endpoint_nbo = true;
> + }
> +#endif
> +
> + /* insert into endpoint array */
> + btl_endpoint->endpoint_proc = btl_proc;
> + btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
> +
> + /* sanity checks */
> + if( NULL == local_interfaces ) {
> + if( NULL == mca_btl_tcp_retrieve_local_interfaces() )
> + return OMPI_ERR_OUT_OF_RESOURCE;
> + }
> + if( 0 == num_local_interfaces ) {
> + return OMPI_ERR_UNREACH;
> + }
> +
> + if( NULL == peer_interfaces ) {
> + max_peer_interfaces = max_local_interfaces;
> + peer_interfaces = (mca_btl_tcp_interface_t**)malloc( max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) );
> + }
> + num_peer_interfaces = 0;
> + memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
> + memset(peer_interfaces, 0, max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*));
> +
> /*
> * identify all kernel interfaces and the associated addresses of
> * the peer
> @@ -405,9 +424,16 @@
> if(-1 == index) {
> index = num_peer_interfaces++;
> peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index;
> + if( num_peer_interfaces == max_peer_interfaces ) {
> + max_peer_interfaces <<= 1;
> + peer_interfaces = (mca_btl_tcp_interface_t**)realloc( peer_interfaces,
> + max_peer_interfaces * sizeof(mca_btl_tcp_interface_t*) );
> + if( NULL == peer_interfaces )
> + return OMPI_ERR_OUT_OF_RESOURCE;
> + }
> peer_interfaces[index] = (mca_btl_tcp_interface_t *) malloc(sizeof(mca_btl_tcp_interface_t));
> mca_btl_tcp_initialise_interface(peer_interfaces[index],
> - endpoint_addr->addr_ifkindex, index);
> + endpoint_addr->addr_ifkindex, index);
> }
>
> /*
> @@ -419,25 +445,25 @@
> }
>
> switch(endpoint_addr_ss.ss_family) {
> - case AF_INET:
> - peer_interfaces[index]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
> - peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr;
> - memcpy(peer_interfaces[index]->ipv4_address,
> - &endpoint_addr_ss, sizeof(endpoint_addr_ss));
> - break;
> - case AF_INET6:
> - peer_interfaces[index]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
> - peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr;
> - memcpy(peer_interfaces[index]->ipv6_address,
> - &endpoint_addr_ss, sizeof(endpoint_addr_ss));
> - break;
> - default:
> - opal_output(0, "unknown address family for tcp: %d\n",
> - local_addr.ss_family);
> - /*
> - * return OMPI_UNREACH or some error, as this is not
> - * good
> - */
> + case AF_INET:
> + peer_interfaces[index]->ipv4_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
> + peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr;
> + memcpy(peer_interfaces[index]->ipv4_address,
> + &endpoint_addr_ss, sizeof(endpoint_addr_ss));
> + break;
> + case AF_INET6:
> + peer_interfaces[index]->ipv6_address = (struct sockaddr_storage*) malloc(sizeof(endpoint_addr_ss));
> + peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr;
> + memcpy(peer_interfaces[index]->ipv6_address,
> + &endpoint_addr_ss, sizeof(endpoint_addr_ss));
> + break;
> + default:
> + opal_output(0, "unknown address family for tcp: %d\n",
> + endpoint_addr_ss.ss_family);
> + /*
> + * return OMPI_UNREACH or some error, as this is not
> + * good
> + */
> }
> }
>
> @@ -451,17 +477,17 @@
> }
>
> weights = (enum mca_btl_tcp_connection_quality**) malloc(perm_size
> - * sizeof(enum mca_btl_tcp_connection_quality*));
> + * sizeof(enum mca_btl_tcp_connection_quality*));
>
> best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size
> - * sizeof(mca_btl_tcp_addr_t **));
> + * sizeof(mca_btl_tcp_addr_t **));
> for(i = 0; i < perm_size; ++i) {
> weights[i] = (enum mca_btl_tcp_connection_quality*) malloc(perm_size *
> - sizeof(enum mca_btl_tcp_connection_quality));
> + sizeof(enum mca_btl_tcp_connection_quality));
> memset(weights[i], 0, perm_size * sizeof(enum mca_btl_tcp_connection_quality));
>
> best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size *
> - sizeof(mca_btl_tcp_addr_t *));
> + sizeof(mca_btl_tcp_addr_t *));
> memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *));
> }
>
> @@ -478,11 +504,11 @@
>
> /* check for RFC1918 */
> if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address)
> - && opal_net_addr_isipv4public((struct sockaddr*)
> - peer_interfaces[j]->ipv4_address)) {
> + && opal_net_addr_isipv4public((struct sockaddr*)
> + peer_interfaces[j]->ipv4_address)) {
> if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
> - (struct sockaddr*) peer_interfaces[j]->ipv4_address,
> - local_interfaces[i]->ipv4_netmask)) {
> + (struct sockaddr*) peer_interfaces[j]->ipv4_address,
> + local_interfaces[i]->ipv4_netmask)) {
> weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
> } else {
> weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
> @@ -491,8 +517,8 @@
> continue;
> } else {
> if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
> - (struct sockaddr*) peer_interfaces[j]->ipv4_address,
> - local_interfaces[i]->ipv4_netmask)) {
> + (struct sockaddr*) peer_interfaces[j]->ipv4_address,
> + local_interfaces[i]->ipv4_netmask)) {
> weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
> } else {
> weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
> @@ -507,8 +533,8 @@
> if(NULL != local_interfaces[i]->ipv6_address &&
> NULL != peer_interfaces[j]->ipv6_address) {
> if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address,
> - (struct sockaddr*) peer_interfaces[j]->ipv6_address,
> - local_interfaces[i]->ipv6_netmask)) {
> + (struct sockaddr*) peer_interfaces[j]->ipv6_address,
> + local_interfaces[i]->ipv6_netmask)) {
> weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
> } else {
> weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
> @@ -539,9 +565,9 @@
> rc = OMPI_ERR_UNREACH;
> for(i = 0; i < perm_size; ++i) {
> if(best_assignment[i] > num_peer_interfaces
> - || weights[i][best_assignment[i]] == CQ_NO_CONNECTION
> - || peer_interfaces[best_assignment[i]]->inuse
> - || NULL == peer_interfaces[best_assignment[i]]) {
> + || weights[i][best_assignment[i]] == CQ_NO_CONNECTION
> + || peer_interfaces[best_assignment[i]]->inuse
> + || NULL == peer_interfaces[best_assignment[i]]) {
> continue;
> }
> peer_interfaces[best_assignment[i]]->inuse++;
> @@ -565,6 +591,9 @@
> }
> free(peer_interfaces[i]);
> }
> + free(peer_interfaces);
> + peer_interfaces = NULL;
> + max_peer_interfaces = 0;
>
> for(i = 0; i < num_local_interfaces; ++i) {
> if(NULL != local_interfaces[i]->ipv4_address) {
> @@ -575,6 +604,9 @@
> }
> free(local_interfaces[i]);
> }
> + free(local_interfaces);
> + local_interfaces = NULL;
> + max_local_interfaces = 0;
>
> free(weights);
> free(best_addr);
>
> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_proc.h
> ==============================================================================
> --- trunk/ompi/mca/btl/tcp/btl_tcp_proc.h (original)
> +++ trunk/ompi/mca/btl/tcp/btl_tcp_proc.h 2010-02-19 02:10:32 EST (Fri, 19 Feb 2010)
> @@ -41,9 +41,6 @@
> ompi_proc_t *proc_ompi;
> /**< pointer to corresponding ompi_proc_t */
>
> - orte_process_name_t proc_name;
> - /**< globally unique identifier for the process */
> -
> struct mca_btl_tcp_addr_t* proc_addrs;
> /**< array of addresses exported by peer */
>
> _______________________________________________
> svn-full mailing list
> svn-full_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full
>

-- 
Jeff Squyres
jsquyres_at_[hidden]
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/