Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] Some questions about checkpoint/restart (7)
From: George Bosilca (bosilca_at_[hidden])
Date: 2010-03-18 09:00:19


Takayuki,

ompi_ddt_type_size return the size in bytes of the content of the datatype, ignoring the gaps. This function is useful to know the amount of data one has to send over the network, and obviously in this case one should avoid sending the useless gaps/spaces. This function correspond to the MPI_Type_size, as defined by the MPI standard. This is totally different from the sizeof operator in C/C++, as it doesn't include the gaps (spaces) in the middle of the datatype, i.e. there is no notion of alignment.

If you want to get the total span of the datatype you can use the get_extent or get_true_extent of the datatype. These two functions are similar to their counter-parts from the MPI standard. Please read the datatype chapter in the MPI 2.2 standard for more information.

If there is a problem with the code, it certainly doesn't come from the ompi_ddt_type_size function.

  george.

On Mar 18, 2010, at 05:23 , Takayuki Seki wrote:

>
> 7th question is as follows:
>
> (7) The result of communication which use derived datatype after taking checkpoint is incorrect.
>
> Framework : crcp
> Component : bkmrk
> The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
> The function name : traffic_message_append
>
> Framework : datatype
> The source file : ompi/datatype/datatype.h
> The function name : ompi_ddt_type_size
>
> Here's the code that causes the problem:
>
> struct dd {
> char x;
> float a;
> char y;
> float b;
> int c;
> };
> struct dd buf,ans_dd_buf;
>
> if (rank == 0) {
> buf.x = (char)1;
> buf.a = (float)4329.1003;
> buf.y = (char)2;
> buf.b = (float)8474.73;
> buf.c = (int)48;
> }
> else {
> buf.x = (char)0;
> buf.a = (float)0;
> buf.y = (char)0;
> buf.b = (float)0;
> buf.c = (int)0;
> }
> ans_dd_buf.x = (char)1;
> ans_dd_buf.a = (float)4329.1003;
> ans_dd_buf.y = (char)2;
> ans_dd_buf.b = (float)8474.73;
> ans_dd_buf.c = (int)48;
>
> /* item number per a block */
> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
> /* datatype per a block */
> dt[0] = dt[2] = MPI_BYTE;
> dt[1] = dt[3] = MPI_FLOAT;
> dt[4] = MPI_INT;
> /* disp per a block */
> dp[0] = 0;
> MPI_Address(&buf.x,&st);
> MPI_Address(&buf.a,&cr);
> dp[1] = (cr - st);
> MPI_Address(&buf.y,&cr);
> dp[2] = (cr - st);
> MPI_Address(&buf.b,&cr);
> dp[3] = (cr - st);
> MPI_Address(&buf.c,&cr);
> dp[4] = (cr - st);
> cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
> cc = MPI_Type_commit(&newdt);
> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
> MPI_Barrier(MPI_COMM_WORLD);
> printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=%d[%d] x->c=%d[%d]\n"
> ,rank
> ,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
> ,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
> ,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
> ,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
> ,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
> );
> fflush(stdout);
>
> if (rank == 0) {
> MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
> printf(" rank=%d sleep start \n",rank); fflush(stdout);
> sleep(SLPTIME); /** take checkpoint at this point **/
> printf(" rank=%d sleep end \n",rank); fflush(stdout);
> MPI_Wait(&req,&sts);
> MPI_Type_free(&newdt);
> }
> else { /* rank 1 */
> printf(" rank=%d sleep start \n",rank); fflush(stdout);
> sleep(SLPTIME); /** take checkpoint at this point **/
> printf(" rank=%d sleep end \n",rank); fflush(stdout);
> MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
> MPI_Wait(&req,&sts);
> MPI_Type_free(&newdt);
> }
> if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); } /* The error occurs at this point */
> if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
>
> * Take checkpoint while Rank 0 and Rank 1 are performing sleep function
>
> * Construct derived datatype from the structure dd.
>
> * I think that image of memory mapping of the derived datatype is as follows:
>
> 1111111111
> 01234567890123456789
> --------------------
> X###AAAAY###BBBBCCCC
> --------------------
>
> ### means space.
>
> * ddt_size for /** Quick reference to the size of the datatype */ in
> ompi_crcp_bkmrk_pml_traffic_message_ref_t structure is obtained by
> ompi_ddt_type_size function in traffic_message_append function.
>
> if( NULL != datatype ) {
> ompi_ddt_type_size(datatype,
> &ddt_size);
>
> * I think that the returned value of ddt_size is wrong.
> The obtained value is 14.(Does it means total size in the memory is 14bytes?)
>
> struct dd {
> char x; -> charactor is 1byte.
> float a; -> float is 4byte.
> char y; -> charactor is 1byte.
> float b; -> float is 4byte.
> int c; -> integer is 4byte.
> };
>
> * But the returned value of ddt_size should be 20bytes, considering the memory mapping.
>
> * Rank 1 receive messages of only 14bytes in the bkmrk.
> The wrong result is obtained.
>
> * t_mpi_question-7-ng.c : the error occurs.
> Here's my debugging output.
>
> ft_event_post_drain_message:Irecv drain_msg_ref=c89200 rank=0 tag=1000 cnt=1 ddt=14 to=c929b0 [datatype->size=1]
> wait_quiesce_drained: x=1 a=142658605493679655240073216.000000 y=4 b=0.000000 c=32
> /* 14bytes data is received, it is incorrect. values are wrong. */
> drain_message_check_recv:datatype->size=1 14 count=1 1
> ompi_ddt_copy_content_same_ddt:Start size=14 flag=102/4 count=1
> /* DT_FLAG_CONTIGUOUS is false. */
>
>
> * t_mpi_question-7-ok.c : the error does not occur.
> Here's my debugging output.
>
> ft_event_post_drain_message:Irecv drain_msg_ref=a51280 rank=0 tag=1000 cnt=1 ddt=20 to=a5b6b0 [datatype->size=1]
> wait_quiesce_drained: x=1 a=4329.100098 y=2 b=8474.730469 c=48
> /* 20bytes data is received correctly. */
> drain_message_check_recv:datatype->size=1 20 count=1 1
> ompi_ddt_copy_content_same_ddt:Start size=20 flag=186/4 count=1
> /* DT_FLAG_CONTIGUOUS is true. */
>
> * difference list
>
> -bash-3.2$ diff -c t_mpi_question-7-ng.c t_mpi_question-7-ok.c
> *** t_mpi_question-7-ng.c Fri Feb 26 13:07:05 2010
> --- t_mpi_question-7-ok.c Fri Feb 26 13:20:25 2010
> ***************
> *** 8,16 ****
> #define ITEMNUM 5
>
> struct dd {
> ! char x;
> float a;
> ! char y;
> float b;
> int c;
> };
> --- 8,16 ----
> #define ITEMNUM 5
>
> struct dd {
> ! int x;
> float a;
> ! int y;
> float b;
> int c;
> };
> ***************
> *** 31,52 ****
> MPI_Comm_size(MPI_COMM_WORLD,&size);
>
> if (rank == 0) {
> ! buf.x = (char)1;
> buf.a = (float)4329.1003;
> ! buf.y = (char)2;
> buf.b = (float)8474.73;
> buf.c = (int)48;
> }
> else {
> ! buf.x = (char)0;
> buf.a = (float)0;
> ! buf.y = (char)0;
> buf.b = (float)0;
> buf.c = (int)0;
> }
> ! ans_dd_buf.x = (char)1;
> ans_dd_buf.a = (float)4329.1003;
> ! ans_dd_buf.y = (char)2;
> ans_dd_buf.b = (float)8474.73;
> ans_dd_buf.c = (int)48;
>
> --- 31,52 ----
> MPI_Comm_size(MPI_COMM_WORLD,&size);
>
> if (rank == 0) {
> ! buf.x = (int)1;
> buf.a = (float)4329.1003;
> ! buf.y = (int)2;
> buf.b = (float)8474.73;
> buf.c = (int)48;
> }
> else {
> ! buf.x = (int)0;
> buf.a = (float)0;
> ! buf.y = (int)0;
> buf.b = (float)0;
> buf.c = (int)0;
> }
> ! ans_dd_buf.x = (int)1;
> ans_dd_buf.a = (float)4329.1003;
> ! ans_dd_buf.y = (int)2;
> ans_dd_buf.b = (float)8474.73;
> ans_dd_buf.c = (int)48;
>
> ***************
> *** 54,60 ****
> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>
> /* datatype per a block */
> ! dt[0] = dt[2] = MPI_BYTE;
> dt[1] = dt[3] = MPI_FLOAT;
> dt[4] = MPI_INT;
>
> --- 54,60 ----
> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>
> /* datatype per a block */
> ! dt[0] = dt[2] = MPI_INT;
> dt[1] = dt[3] = MPI_FLOAT;
> dt[4] = MPI_INT;
>
>
> -bash-3.2$ cat t_mpi_question-7-ng.c
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include "mpi.h"
>
> #define SLPTIME 60
>
> #define ITEMNUM 5
>
> struct dd {
> char x;
> float a;
> char y;
> float b;
> int c;
> };
>
> int main(int ac,char **av)
> {
> int rank,size,cc;
> MPI_Request req;
> MPI_Status sts;
> struct dd buf,ans_dd_buf;
> int b_l[ITEMNUM];
> MPI_Aint dp[ITEMNUM],st,cr;
> MPI_Datatype dt[ITEMNUM],newdt;
>
> MPI_Init(&ac,&av);
>
> MPI_Comm_rank(MPI_COMM_WORLD,&rank);
> MPI_Comm_size(MPI_COMM_WORLD,&size);
>
> if (rank == 0) {
> buf.x = (char)1;
> buf.a = (float)4329.1003;
> buf.y = (char)2;
> buf.b = (float)8474.73;
> buf.c = (int)48;
> }
> else {
> buf.x = (char)0;
> buf.a = (float)0;
> buf.y = (char)0;
> buf.b = (float)0;
> buf.c = (int)0;
> }
> ans_dd_buf.x = (char)1;
> ans_dd_buf.a = (float)4329.1003;
> ans_dd_buf.y = (char)2;
> ans_dd_buf.b = (float)8474.73;
> ans_dd_buf.c = (int)48;
>
> /* item number per a block */
> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>
> /* datatype per a block */
> dt[0] = dt[2] = MPI_BYTE;
> dt[1] = dt[3] = MPI_FLOAT;
> dt[4] = MPI_INT;
>
> /* disp per a block */
> dp[0] = 0;
> MPI_Address(&buf.x,&st);
>
> MPI_Address(&buf.a,&cr);
> dp[1] = (cr - st);
>
> MPI_Address(&buf.y,&cr);
> dp[2] = (cr - st);
>
> MPI_Address(&buf.b,&cr);
> dp[3] = (cr - st);
>
> MPI_Address(&buf.c,&cr);
> dp[4] = (cr - st);
>
> cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
> cc = MPI_Type_commit(&newdt);
> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>
> MPI_Barrier(MPI_COMM_WORLD);
>
> printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=%d[%d] x->c=%d[%d]\n"
> ,rank
> ,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
> ,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
> ,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
> ,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
> ,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
> );
> fflush(stdout);
>
> if (rank == 0) {
> MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
>
> printf(" rank=%d sleep start \n",rank); fflush(stdout);
> sleep(SLPTIME);
> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>
> MPI_Wait(&req,&sts);
> MPI_Type_free(&newdt);
> }
> else {
> printf(" rank=%d sleep start \n",rank); fflush(stdout);
> sleep(SLPTIME);
> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>
> MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
> MPI_Wait(&req,&sts);
> MPI_Type_free(&newdt);
> }
>
> printf(" rank=%d pass-2 %d %f %d %f %d \n"
> ,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
> if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
>
> cc = MPI_Finalize();
> if (rank ==0) {
> printf(" rank=%d program end \n",rank); fflush(stdout);
> }
> return(0);
> }
>
> -bash-3.2$ cat t_mpi_question-7-ok.c
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include "mpi.h"
>
> #define SLPTIME 60
>
> #define ITEMNUM 5
>
> struct dd {
> int x;
> float a;
> int y;
> float b;
> int c;
> };
>
> int main(int ac,char **av)
> {
> int rank,size,cc;
> MPI_Request req;
> MPI_Status sts;
> struct dd buf,ans_dd_buf;
> int b_l[ITEMNUM];
> MPI_Aint dp[ITEMNUM],st,cr;
> MPI_Datatype dt[ITEMNUM],newdt;
>
> MPI_Init(&ac,&av);
>
> MPI_Comm_rank(MPI_COMM_WORLD,&rank);
> MPI_Comm_size(MPI_COMM_WORLD,&size);
>
> if (rank == 0) {
> buf.x = (int)1;
> buf.a = (float)4329.1003;
> buf.y = (int)2;
> buf.b = (float)8474.73;
> buf.c = (int)48;
> }
> else {
> buf.x = (int)0;
> buf.a = (float)0;
> buf.y = (int)0;
> buf.b = (float)0;
> buf.c = (int)0;
> }
> ans_dd_buf.x = (int)1;
> ans_dd_buf.a = (float)4329.1003;
> ans_dd_buf.y = (int)2;
> ans_dd_buf.b = (float)8474.73;
> ans_dd_buf.c = (int)48;
>
> /* item number per a block */
> b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
>
> /* datatype per a block */
> dt[0] = dt[2] = MPI_INT;
> dt[1] = dt[3] = MPI_FLOAT;
> dt[4] = MPI_INT;
>
> /* disp per a block */
> dp[0] = 0;
> MPI_Address(&buf.x,&st);
>
> MPI_Address(&buf.a,&cr);
> dp[1] = (cr - st);
>
> MPI_Address(&buf.y,&cr);
> dp[2] = (cr - st);
>
> MPI_Address(&buf.b,&cr);
> dp[3] = (cr - st);
>
> MPI_Address(&buf.c,&cr);
> dp[4] = (cr - st);
>
> cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
> cc = MPI_Type_commit(&newdt);
> if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
>
> MPI_Barrier(MPI_COMM_WORLD);
>
> printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=%d[%d] x->c=%d[%d]\n"
> ,rank
> ,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
> ,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
> ,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
> ,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
> ,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
> );
> fflush(stdout);
>
> if (rank == 0) {
> MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
>
> printf(" rank=%d sleep start \n",rank); fflush(stdout);
> sleep(SLPTIME);
> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>
> MPI_Wait(&req,&sts);
> MPI_Type_free(&newdt);
> }
> else {
> printf(" rank=%d sleep start \n",rank); fflush(stdout);
> sleep(SLPTIME);
> printf(" rank=%d sleep end \n",rank); fflush(stdout);
>
> MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
> MPI_Wait(&req,&sts);
> MPI_Type_free(&newdt);
> }
>
> printf(" rank=%d pass-2 %d %f %d %f %d \n"
> ,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
> if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
> if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
>
> cc = MPI_Finalize();
> if (rank ==0) {
> printf(" rank=%d program end \n",rank); fflush(stdout);
> }
> return(0);
> }
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel