Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] Some questions about checkpoint/restart (7)
From: Josh Hursey (jjhursey_at_[hidden])
Date: 2010-03-18 09:07:34


I would agree with George's comment on this particular bug. I doubt
that the problem is with the datatype engine, but it is more likely
that it is a problem with how the crcp/bkmrk component uses the
datatype engine. I'll have to look into it more later.

-- Josh

On Mar 18, 2010, at 9:00 AM, George Bosilca wrote:

> Takayuki,
>
> ompi_ddt_type_size return the size in bytes of the content of the
> datatype, ignoring the gaps. This function is useful to know the
> amount of data one has to send over the network, and obviously in
> this case one should avoid sending the useless gaps/spaces. This
> function correspond to the MPI_Type_size, as defined by the MPI
> standard. This is totally different from the sizeof operator in C/C+
> +, as it doesn't include the gaps (spaces) in the middle of the
> datatype, i.e. there is no notion of alignment.
>
> If you want to get the total span of the datatype you can use the
> get_extent or get_true_extent of the datatype. These two functions
> are similar to their counter-parts from the MPI standard. Please
> read the datatype chapter in the MPI 2.2 standard for more
> information.
>
> If there is a problem with the code, it certainly doesn't come from
> the ompi_ddt_type_size function.
>
> george.
>
> On Mar 18, 2010, at 05:23 , Takayuki Seki wrote:
>
>>
>> 7th question is as follows:
>>
>> (7) The result of communication which use derived datatype after
>> taking checkpoint is incorrect.
>>
>> Framework : crcp
>> Component : bkmrk
>> The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
>> The function name : traffic_message_append
>>
>> Framework : datatype
>> The source file : ompi/datatype/datatype.h
>> The function name : ompi_ddt_type_size
>>
>> Here's the code that causes the problem:
>>
>> struct dd {
>> char x;
>> float a;
>> char y;
>> float b;
>> int c;
>> };
>> struct dd buf,ans_dd_buf;
>>
>> if (rank == 0) {
>> buf.x = (char)1;
>> buf.a = (float)4329.1003;
>> buf.y = (char)2;
>> buf.b = (float)8474.73;
>> buf.c = (int)48;
>> }
>> else {
>> buf.x = (char)0;
>> buf.a = (float)0;
>> buf.y = (char)0;
>> buf.b = (float)0;
>> buf.c = (int)0;
>> }
>> ans_dd_buf.x = (char)1;
>> ans_dd_buf.a = (float)4329.1003;
>> ans_dd_buf.y = (char)2;
>> ans_dd_buf.b = (float)8474.73;
>> ans_dd_buf.c = (int)48;
>>
>> /* item number per a block */
>> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>> /* datatype per a block */
>> dt[0] = dt[2] = MPI_BYTE;
>> dt[1] = dt[3] = MPI_FLOAT;
>> dt[4] = MPI_INT;
>> /* disp per a block */
>> dp[0] = 0;
>> MPI_Address(&buf.x,&st);
>> MPI_Address(&buf.a,&cr);
>> dp[1] = (cr - st);
>> MPI_Address(&buf.y,&cr);
>> dp[2] = (cr - st);
>> MPI_Address(&buf.b,&cr);
>> dp[3] = (cr - st);
>> MPI_Address(&buf.c,&cr);
>> dp[4] = (cr - st);
>> cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
>> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>> cc = MPI_Type_commit(&newdt);
>> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>> MPI_Barrier(MPI_COMM_WORLD);
>> printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=
>> %d[%d] x->c=%d[%d]\n"
>> ,rank
>> ,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
>> ,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
>> ,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
>> ,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
>> ,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
>> );
>> fflush(stdout);
>>
>> if (rank == 0) {
>> MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
>> printf(" rank=%d sleep start \n",rank); fflush(stdout);
>> sleep(SLPTIME); /** take checkpoint at this point **/
>> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>> MPI_Wait(&req,&sts);
>> MPI_Type_free(&newdt);
>> }
>> else { /* rank 1 */
>> printf(" rank=%d sleep start \n",rank); fflush(stdout);
>> sleep(SLPTIME); /** take checkpoint at this point **/
>> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>> MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
>> MPI_Wait(&req,&sts);
>> MPI_Type_free(&newdt);
>> }
>> if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); } /* The
>> error occurs at this point */
>> if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
>>
>> * Take checkpoint while Rank 0 and Rank 1 are performing sleep
>> function
>>
>> * Construct derived datatype from the structure dd.
>>
>> * I think that image of memory mapping of the derived datatype is
>> as follows:
>>
>> 1111111111
>> 01234567890123456789
>> --------------------
>> X###AAAAY###BBBBCCCC
>> --------------------
>>
>> ### means space.
>>
>> * ddt_size for /** Quick reference to the size of the datatype */ in
>> ompi_crcp_bkmrk_pml_traffic_message_ref_t structure is obtained by
>> ompi_ddt_type_size function in traffic_message_append function.
>>
>> if( NULL != datatype ) {
>> ompi_ddt_type_size(datatype,
>> &ddt_size);
>>
>> * I think that the returned value of ddt_size is wrong.
>> The obtained value is 14.(Does it means total size in the memory is
>> 14bytes?)
>>
>> struct dd {
>> char x; -> charactor is 1byte.
>> float a; -> float is 4byte.
>> char y; -> charactor is 1byte.
>> float b; -> float is 4byte.
>> int c; -> integer is 4byte.
>> };
>>
>> * But the returned value of ddt_size should be 20bytes, considering
>> the memory mapping.
>>
>> * Rank 1 receive messages of only 14bytes in the bkmrk.
>> The wrong result is obtained.
>>
>> * t_mpi_question-7-ng.c : the error occurs.
>> Here's my debugging output.
>>
>> ft_event_post_drain_message:Irecv drain_msg_ref=c89200 rank=0
>> tag=1000 cnt=1 ddt=14 to=c929b0 [datatype->size=1]
>> wait_quiesce_drained: x=1 a=142658605493679655240073216.000000 y=4
>> b=0.000000 c=32
>> /* 14bytes data is received, it is incorrect. values are wrong. */
>> drain_message_check_recv:datatype->size=1 14 count=1 1
>> ompi_ddt_copy_content_same_ddt:Start size=14 flag=102/4 count=1
>> /* DT_FLAG_CONTIGUOUS is false. */
>>
>>
>> * t_mpi_question-7-ok.c : the error does not occur.
>> Here's my debugging output.
>>
>> ft_event_post_drain_message:Irecv drain_msg_ref=a51280 rank=0
>> tag=1000 cnt=1 ddt=20 to=a5b6b0 [datatype->size=1]
>> wait_quiesce_drained: x=1 a=4329.100098 y=2 b=8474.730469 c=48
>> /* 20bytes data is received correctly. */
>> drain_message_check_recv:datatype->size=1 20 count=1 1
>> ompi_ddt_copy_content_same_ddt:Start size=20 flag=186/4 count=1
>> /* DT_FLAG_CONTIGUOUS is true. */
>>
>> * difference list
>>
>> -bash-3.2$ diff -c t_mpi_question-7-ng.c t_mpi_question-7-ok.c
>> *** t_mpi_question-7-ng.c Fri Feb 26 13:07:05 2010
>> --- t_mpi_question-7-ok.c Fri Feb 26 13:20:25 2010
>> ***************
>> *** 8,16 ****
>> #define ITEMNUM 5
>>
>> struct dd {
>> ! char x;
>> float a;
>> ! char y;
>> float b;
>> int c;
>> };
>> --- 8,16 ----
>> #define ITEMNUM 5
>>
>> struct dd {
>> ! int x;
>> float a;
>> ! int y;
>> float b;
>> int c;
>> };
>> ***************
>> *** 31,52 ****
>> MPI_Comm_size(MPI_COMM_WORLD,&size);
>>
>> if (rank == 0) {
>> ! buf.x = (char)1;
>> buf.a = (float)4329.1003;
>> ! buf.y = (char)2;
>> buf.b = (float)8474.73;
>> buf.c = (int)48;
>> }
>> else {
>> ! buf.x = (char)0;
>> buf.a = (float)0;
>> ! buf.y = (char)0;
>> buf.b = (float)0;
>> buf.c = (int)0;
>> }
>> ! ans_dd_buf.x = (char)1;
>> ans_dd_buf.a = (float)4329.1003;
>> ! ans_dd_buf.y = (char)2;
>> ans_dd_buf.b = (float)8474.73;
>> ans_dd_buf.c = (int)48;
>>
>> --- 31,52 ----
>> MPI_Comm_size(MPI_COMM_WORLD,&size);
>>
>> if (rank == 0) {
>> ! buf.x = (int)1;
>> buf.a = (float)4329.1003;
>> ! buf.y = (int)2;
>> buf.b = (float)8474.73;
>> buf.c = (int)48;
>> }
>> else {
>> ! buf.x = (int)0;
>> buf.a = (float)0;
>> ! buf.y = (int)0;
>> buf.b = (float)0;
>> buf.c = (int)0;
>> }
>> ! ans_dd_buf.x = (int)1;
>> ans_dd_buf.a = (float)4329.1003;
>> ! ans_dd_buf.y = (int)2;
>> ans_dd_buf.b = (float)8474.73;
>> ans_dd_buf.c = (int)48;
>>
>> ***************
>> *** 54,60 ****
>> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>>
>> /* datatype per a block */
>> ! dt[0] = dt[2] = MPI_BYTE;
>> dt[1] = dt[3] = MPI_FLOAT;
>> dt[4] = MPI_INT;
>>
>> --- 54,60 ----
>> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>>
>> /* datatype per a block */
>> ! dt[0] = dt[2] = MPI_INT;
>> dt[1] = dt[3] = MPI_FLOAT;
>> dt[4] = MPI_INT;
>>
>>
>> -bash-3.2$ cat t_mpi_question-7-ng.c
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <unistd.h>
>> #include "mpi.h"
>>
>> #define SLPTIME 60
>>
>> #define ITEMNUM 5
>>
>> struct dd {
>> char x;
>> float a;
>> char y;
>> float b;
>> int c;
>> };
>>
>> int main(int ac,char **av)
>> {
>> int rank,size,cc;
>> MPI_Request req;
>> MPI_Status sts;
>> struct dd buf,ans_dd_buf;
>> int b_l[ITEMNUM];
>> MPI_Aint dp[ITEMNUM],st,cr;
>> MPI_Datatype dt[ITEMNUM],newdt;
>>
>> MPI_Init(&ac,&av);
>>
>> MPI_Comm_rank(MPI_COMM_WORLD,&rank);
>> MPI_Comm_size(MPI_COMM_WORLD,&size);
>>
>> if (rank == 0) {
>> buf.x = (char)1;
>> buf.a = (float)4329.1003;
>> buf.y = (char)2;
>> buf.b = (float)8474.73;
>> buf.c = (int)48;
>> }
>> else {
>> buf.x = (char)0;
>> buf.a = (float)0;
>> buf.y = (char)0;
>> buf.b = (float)0;
>> buf.c = (int)0;
>> }
>> ans_dd_buf.x = (char)1;
>> ans_dd_buf.a = (float)4329.1003;
>> ans_dd_buf.y = (char)2;
>> ans_dd_buf.b = (float)8474.73;
>> ans_dd_buf.c = (int)48;
>>
>> /* item number per a block */
>> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>>
>> /* datatype per a block */
>> dt[0] = dt[2] = MPI_BYTE;
>> dt[1] = dt[3] = MPI_FLOAT;
>> dt[4] = MPI_INT;
>>
>> /* disp per a block */
>> dp[0] = 0;
>> MPI_Address(&buf.x,&st);
>>
>> MPI_Address(&buf.a,&cr);
>> dp[1] = (cr - st);
>>
>> MPI_Address(&buf.y,&cr);
>> dp[2] = (cr - st);
>>
>> MPI_Address(&buf.b,&cr);
>> dp[3] = (cr - st);
>>
>> MPI_Address(&buf.c,&cr);
>> dp[4] = (cr - st);
>>
>> cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
>> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>> cc = MPI_Type_commit(&newdt);
>> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>>
>> MPI_Barrier(MPI_COMM_WORLD);
>>
>> printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=
>> %d[%d] x->c=%d[%d]\n"
>> ,rank
>> ,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
>> ,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
>> ,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
>> ,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
>> ,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
>> );
>> fflush(stdout);
>>
>> if (rank == 0) {
>> MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
>>
>> printf(" rank=%d sleep start \n",rank); fflush(stdout);
>> sleep(SLPTIME);
>> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>>
>> MPI_Wait(&req,&sts);
>> MPI_Type_free(&newdt);
>> }
>> else {
>> printf(" rank=%d sleep start \n",rank); fflush(stdout);
>> sleep(SLPTIME);
>> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>>
>> MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
>> MPI_Wait(&req,&sts);
>> MPI_Type_free(&newdt);
>> }
>>
>> printf(" rank=%d pass-2 %d %f %d %f %d \n"
>> ,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
>> if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
>>
>> cc = MPI_Finalize();
>> if (rank ==0) {
>> printf(" rank=%d program end \n",rank); fflush(stdout);
>> }
>> return(0);
>> }
>>
>> -bash-3.2$ cat t_mpi_question-7-ok.c
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <unistd.h>
>> #include "mpi.h"
>>
>> #define SLPTIME 60
>>
>> #define ITEMNUM 5
>>
>> struct dd {
>> int x;
>> float a;
>> int y;
>> float b;
>> int c;
>> };
>>
>> int main(int ac,char **av)
>> {
>> int rank,size,cc;
>> MPI_Request req;
>> MPI_Status sts;
>> struct dd buf,ans_dd_buf;
>> int b_l[ITEMNUM];
>> MPI_Aint dp[ITEMNUM],st,cr;
>> MPI_Datatype dt[ITEMNUM],newdt;
>>
>> MPI_Init(&ac,&av);
>>
>> MPI_Comm_rank(MPI_COMM_WORLD,&rank);
>> MPI_Comm_size(MPI_COMM_WORLD,&size);
>>
>> if (rank == 0) {
>> buf.x = (int)1;
>> buf.a = (float)4329.1003;
>> buf.y = (int)2;
>> buf.b = (float)8474.73;
>> buf.c = (int)48;
>> }
>> else {
>> buf.x = (int)0;
>> buf.a = (float)0;
>> buf.y = (int)0;
>> buf.b = (float)0;
>> buf.c = (int)0;
>> }
>> ans_dd_buf.x = (int)1;
>> ans_dd_buf.a = (float)4329.1003;
>> ans_dd_buf.y = (int)2;
>> ans_dd_buf.b = (float)8474.73;
>> ans_dd_buf.c = (int)48;
>>
>> /* item number per a block */
>> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>>
>> /* datatype per a block */
>> dt[0] = dt[2] = MPI_INT;
>> dt[1] = dt[3] = MPI_FLOAT;
>> dt[4] = MPI_INT;
>>
>> /* disp per a block */
>> dp[0] = 0;
>> MPI_Address(&buf.x,&st);
>>
>> MPI_Address(&buf.a,&cr);
>> dp[1] = (cr - st);
>>
>> MPI_Address(&buf.y,&cr);
>> dp[2] = (cr - st);
>>
>> MPI_Address(&buf.b,&cr);
>> dp[3] = (cr - st);
>>
>> MPI_Address(&buf.c,&cr);
>> dp[4] = (cr - st);
>>
>> cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
>> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>> cc = MPI_Type_commit(&newdt);
>> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>>
>> MPI_Barrier(MPI_COMM_WORLD);
>>
>> printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=
>> %d[%d] x->c=%d[%d]\n"
>> ,rank
>> ,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
>> ,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
>> ,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
>> ,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
>> ,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
>> );
>> fflush(stdout);
>>
>> if (rank == 0) {
>> MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
>>
>> printf(" rank=%d sleep start \n",rank); fflush(stdout);
>> sleep(SLPTIME);
>> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>>
>> MPI_Wait(&req,&sts);
>> MPI_Type_free(&newdt);
>> }
>> else {
>> printf(" rank=%d sleep start \n",rank); fflush(stdout);
>> sleep(SLPTIME);
>> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>>
>> MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
>> MPI_Wait(&req,&sts);
>> MPI_Type_free(&newdt);
>> }
>>
>> printf(" rank=%d pass-2 %d %f %d %f %d \n"
>> ,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
>> if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
>> if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
>>
>> cc = MPI_Finalize();
>> if (rank ==0) {
>> printf(" rank=%d program end \n",rank); fflush(stdout);
>> }
>> return(0);
>> }
>>
>> _______________________________________________
>> devel mailing list
>> devel_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel