Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

From: Jeff Squyres (jsquyres_at_[hidden])
Date: 2007-07-25 13:51:24


Aside from updating the TCP BTL to use the not-deprecated MCA
parameter interface functions, the new functionality introduced in
this patch is about 5 lines of code (1 new member variable in the
component struct, one new call to the MCA param register function,
then use the new component.member variable instead of a hard-coded
1). Is it really harmful?

I did it because I found it silly that we have this nice run-time
parameter system but for someone who wanted to experiment with
nagle's algorithm, they had to go change source code.

Plus, per prior conversations, it would be nice to be able to have
OMPI be able to block for progress someday...

So yes, I agree it will be very unusual for someone to use this
parameter. But doesn't that describe many of our MCA
parameters? :-) The point of MCA parameters is not necessarily for
all the purposes that we can imagine -- it's precisely for the
reasons that we *can't* imagine that they're useful.

Just my $0.02...

On Jul 25, 2007, at 1:40 PM, George Bosilca wrote:

> Do we really need this one ? It look more like dead code forever to
> me than anything else.
>
> On one hand we're claiming that we don't use any blocking pooling
> inside (and therefore Open MPI use 100% of the CPU) because a cluster
> is dedicated to performance gathering, and on the other we allow the
> users to disable the only latency improvement that TCP allow us ...
> It doesn't really make sense to me. Moreover, it turned out that the
> problem wasn't even coming from there.
>
> george.
>
> On Jul 25, 2007, at 8:21 AM, jsquyres_at_[hidden] wrote:
>
>> Author: jsquyres
>> Date: 2007-07-25 08:21:00 EDT (Wed, 25 Jul 2007)
>> New Revision: 15606
>> URL: https://svn.open-mpi.org/trac/ompi/changeset/15606
>>
>> Log:
>> Add MCA parameter to enable/disable Nagle's algorithm on the TCP BTL.
>>
>> Text files modified:
>> trunk/ompi/mca/btl/tcp/btl_tcp.h | 3 ++
>> trunk/ompi/mca/btl/tcp/btl_tcp_component.c | 54 ++++++++++++++
>> ++++++++-----------------
>> trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c | 2
>> 3 files changed, 34 insertions(+), 25 deletions(-)
>>
>> Modified: trunk/ompi/mca/btl/tcp/btl_tcp.h
>> =====================================================================
>> =
>> ========
>> --- trunk/ompi/mca/btl/tcp/btl_tcp.h (original)
>> +++ trunk/ompi/mca/btl/tcp/btl_tcp.h 2007-07-25 08:21:00 EDT (Wed,
>> 25 Jul 2007)
>> @@ -90,6 +90,9 @@
>> ompi_free_list_t tcp_frag_eager;
>> ompi_free_list_t tcp_frag_max;
>> ompi_free_list_t tcp_frag_user;
>> +
>> + /* Do we want to use TCP_NODELAY? */
>> + int tcp_use_nodelay;
>> };
>> typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t;
>>
>>
>> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_component.c
>> =====================================================================
>> =
>> ========
>> --- trunk/ompi/mca/btl/tcp/btl_tcp_component.c (original)
>> +++ trunk/ompi/mca/btl/tcp/btl_tcp_component.c 2007-07-25 08:21:00
>> EDT (Wed, 25 Jul 2007)
>> @@ -104,23 +104,27 @@
>> */
>>
>> static inline char* mca_btl_tcp_param_register_string(
>> - const char*
>> param_name,
>> - const char*
>> default_value)
>> + const char* param_name,
>> + const char* help_string,
>> + const char* default_value)
>> {
>> - char *param_value;
>> - int id = mca_base_param_register_string
>> ("btl","tcp",param_name,NULL,default_value);
>> - mca_base_param_lookup_string(id, &param_value);
>> - return param_value;
>> + char *value;
>> + mca_base_param_reg_string
>> (&mca_btl_tcp_component.super.btl_version,
>> + param_name, help_string, false, false,
>> + default_value, &value);
>> + return value;
>> }
>>
>> static inline int mca_btl_tcp_param_register_int(
>> const char* param_name,
>> + const char* help_string,
>> int default_value)
>> {
>> - int id = mca_base_param_register_int
>> ("btl","tcp",param_name,NULL,default_value);
>> - int param_value = default_value;
>> - mca_base_param_lookup_int(id,&param_value);
>> - return param_value;
>> + int value;
>> + mca_base_param_reg_int(&mca_btl_tcp_component.super.btl_version,
>> + param_name, help_string, false, false,
>> + default_value, &value);
>> + return value;
>> }
>>
>>
>> @@ -197,23 +201,25 @@
>>
>> /* register TCP component parameters */
>> mca_btl_tcp_component.tcp_num_links =
>> - mca_btl_tcp_param_register_int("links", 1);
>> + mca_btl_tcp_param_register_int("links", NULL, 1);
>> mca_btl_tcp_component.tcp_if_include =
>> - mca_btl_tcp_param_register_string("if_include", "");
>> + mca_btl_tcp_param_register_string("if_include", NULL, "");
>> mca_btl_tcp_component.tcp_if_exclude =
>> - mca_btl_tcp_param_register_string("if_exclude", "lo");
>> + mca_btl_tcp_param_register_string("if_exclude", NULL, "lo");
>> mca_btl_tcp_component.tcp_free_list_num =
>> - mca_btl_tcp_param_register_int ("free_list_num", 8);
>> + mca_btl_tcp_param_register_int ("free_list_num", NULL, 8);
>> mca_btl_tcp_component.tcp_free_list_max =
>> - mca_btl_tcp_param_register_int ("free_list_max", -1);
>> + mca_btl_tcp_param_register_int ("free_list_max", NULL, -1);
>> mca_btl_tcp_component.tcp_free_list_inc =
>> - mca_btl_tcp_param_register_int ("free_list_inc", 32);
>> + mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32);
>> mca_btl_tcp_component.tcp_sndbuf =
>> - mca_btl_tcp_param_register_int ("sndbuf", 128*1024);
>> + mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024);
>> mca_btl_tcp_component.tcp_rcvbuf =
>> - mca_btl_tcp_param_register_int ("rcvbuf", 128*1024);
>> + mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024);
>> mca_btl_tcp_component.tcp_endpoint_cache =
>> - mca_btl_tcp_param_register_int ("endpoint_cache", 30*1024);
>> + mca_btl_tcp_param_register_int ("endpoint_cache", NULL,
>> 30*1024);
>> + mca_btl_tcp_component.tcp_use_nodelay =
>> + !mca_btl_tcp_param_register_int ("use_nagle", "Whether to
>> use Nagle's algorithm or not (using Nagle's algorithm may increase
>> short message latency)", 0);
>>
>> mca_btl_tcp_module.super.btl_exclusivity =
>> MCA_BTL_EXCLUSIVITY_LOW;
>> mca_btl_tcp_module.super.btl_eager_limit = 64*1024;
>> @@ -232,7 +238,7 @@
>> &mca_btl_tcp_module.super);
>>
>> mca_btl_tcp_component.tcp_disable_family =
>> - mca_btl_tcp_param_register_int ("disable_family", 0);
>> + mca_btl_tcp_param_register_int ("disable_family", NULL, 0);
>> return OMPI_SUCCESS;
>> }
>>
>> @@ -320,11 +326,11 @@
>>
>> /* allow user to specify interface bandwidth */
>> sprintf(param, "bandwidth_%s", if_name);
>> - btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
>> (param, btl->super.btl_bandwidth);
>> + btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
>> (param, NULL, btl->super.btl_bandwidth);
>>
>> /* allow user to override/specify latency ranking */
>> sprintf(param, "latency_%s", if_name);
>> - btl->super.btl_latency = mca_btl_tcp_param_register_int
>> (param, btl->super.btl_latency);
>> + btl->super.btl_latency = mca_btl_tcp_param_register_int
>> (param, NULL, btl->super.btl_latency);
>> if( i > 0 ) {
>> btl->super.btl_bandwidth >>= 1;
>> btl->super.btl_latency <<= 1;
>> @@ -332,11 +338,11 @@
>>
>> /* allow user to specify interface bandwidth */
>> sprintf(param, "bandwidth_%s:%d", if_name, i);
>> - btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
>> (param, btl->super.btl_bandwidth);
>> + btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
>> (param, NULL, btl->super.btl_bandwidth);
>>
>> /* allow user to override/specify latency ranking */
>> sprintf(param, "latency_%s:%d", if_name, i);
>> - btl->super.btl_latency = mca_btl_tcp_param_register_int
>> (param, btl->super.btl_latency);
>> + btl->super.btl_latency = mca_btl_tcp_param_register_int
>> (param, NULL, btl->super.btl_latency);
>> #if 0 && OMPI_ENABLE_DEBUG
>> BTL_OUTPUT(("interface %s instance %i: bandwidth %d
>> latency %d\n", if_name, i,
>> btl->super.btl_bandwidth, btl-
>>> super.btl_latency));
>>
>> Modified: trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c
>> =====================================================================
>> =
>> ========
>> --- trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c (original)
>> +++ trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c 2007-07-25 08:21:00
>> EDT (Wed, 25 Jul 2007)
>> @@ -489,7 +489,7 @@
>> {
>> int optval;
>> #if defined(TCP_NODELAY)
>> - optval = 1;
>> + optval = mca_btl_tcp_component.tcp_use_nodelay;
>> if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval,
>> sizeof(optval)) < 0) {
>> BTL_ERROR(("setsockopt(TCP_NODELAY) failed: %s (%d)",
>> strerror(opal_socket_errno), opal_socket_errno));
>> _______________________________________________
>> svn mailing list
>> svn_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/svn
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel

-- 
Jeff Squyres
Cisco Systems