Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] Some questions about checkpoint/restart (5)
From: Takayuki Seki (seki_at_[hidden])
Date: 2010-03-18 05:06:43


5th question is as follows:

(5) Receving with MPI_ANY_SOURCE,MPI_ANY_TAG.

This problem might have already been announced by the Open MPI Trac with "Ticket #1769".
This problem will occur with usual MPI program.

Framework : crcp
Component : bkmrk
The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : do_recv_msg_detail_check_drain,traffic_message_find

Here's the code that causes the problem:

#define BLOCKNUM 1048576
#define SLPTIM 60

  if (rank == 0) {
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,200,MPI_COMM_WORLD,&sreq[5]);
    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIM); /** take checkpoint at this point **/
    printf(" rank=%d sleep end \n",rank); fflush(stdout);
    MPI_Wait(&sreq[5],&ssts[5]);
  }
  else { /* rank 1 */
    MPI_Recv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rsts[2]);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&rreq[3]); MPI_Wait(&rreq[3],&rsts[3]);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq[4]); MPI_Wait(&rreq[4],&rsts[4]);
    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIM); /** take checkpoint at this point **/
    printf(" rank=%d sleep end \n",rank); fflush(stdout);
    MPI_Recv(rbuf,BLOCKNUM,MPI_INT,0,200,MPI_COMM_WORLD,&rsts[5]);
  }

* Take checkpoint while Rank 0 and Rank 1 are performing sleep function

* There are two messages which are considered to be an same condition by MPI_ANY_SOURCE,MPI_ANY_TAG in irecv_list.
  It is as follows:

[IRECV=1e44a00 comm_id=6019e0/0/1 msgid=6 count=1048576 tag=100 rank=0 proc_name=-833290239/0 matched=0 done=1 active=0 drain=0 ] [c=0 r=1]
[IRECV=1e44b80 comm_id=6019e0/0/1 msgid=5 count=1048576 tag=-1 rank=-1 proc_name=-833290239/0 matched=0 done=1 active=0 drain=0 ] [c=0 r=1]

* However, do_recv_msg_detail_check_drain function obtains either message information via traffic_message_find.

* Therefore, the other message information can not be obtained.

-bash-3.2$ cat t_mpi_question-5.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"

#define BLOCKNUM 1048576
#define SLPTIM 60

int main(int ac,char **av)
{
  int i;
  int rank,size;
  int *wbuf;
  int *rbuf;
  MPI_Status rsts[4],ssts[4];
  MPI_Request rreq[4],sreq[4];
  int bufsize,count;

  MPI_Init(&ac,&av);

  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  printf(" rank=%d size=%d \n",rank,size); fflush(stdout);
  if (size != 2) { MPI_Abort(MPI_COMM_WORLD,-1); }

  rbuf = (int *)malloc(BLOCKNUM * sizeof(int));
  wbuf = (int *)malloc(BLOCKNUM * sizeof(int));
  if ((rbuf == NULL)||(wbuf == NULL)) { MPI_Abort(MPI_COMM_WORLD,-1); }

  printf(" rank=%d pass-1 \n",rank); fflush(stdout);

  MPI_Barrier(MPI_COMM_WORLD);

  if (rank == 0) {
    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (100+i); }
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);

    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (200+i); }
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);

    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (300+i); }
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);

    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (400+i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,200,MPI_COMM_WORLD,&sreq[0]);

    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIM);
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    MPI_Wait(&sreq[0],&ssts[0]);
  }
  else {
    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Recv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rsts[0]);
    for (i=0;i<BLOCKNUM;i++) { if(rbuf[i] != (100+i)) { MPI_Abort(MPI_COMM_WORLD,1); } }

    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&rreq[1]); MPI_Wait(&rreq[1],&rsts[0]);
    for (i=0;i<BLOCKNUM;i++) { if(rbuf[i] != (200+i)) { MPI_Abort(MPI_COMM_WORLD,1); } }

    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq[2]); MPI_Wait(&rreq[2],&rsts[2]);
    for (i=0;i<BLOCKNUM;i++) { if(rbuf[i] != (300+i)) { MPI_Abort(MPI_COMM_WORLD,1); } }

    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIM);
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Recv(rbuf,BLOCKNUM,MPI_INT,0,200,MPI_COMM_WORLD,&rsts[3]);
    for (i=0;i<BLOCKNUM;i++) { if(rbuf[i] != (400+i)) { MPI_Abort(MPI_COMM_WORLD,1); } }
  }

  MPI_Barrier(MPI_COMM_WORLD);
  free(rbuf);
  free(wbuf);
  MPI_Finalize();
  if (rank == 0) {
    printf(" rank=%d program end \n",rank); fflush(stdout); fflush(stderr);
  }
  return(0);
}