Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] Some questions about checkpoint/restart (4)
From: Takayuki Seki (seki_at_[hidden])
Date: 2010-03-12 01:27:54


4th question is as follows:

(4) The pointer variables for information about communicator in
    the ompi_crcp_bkmrk_pml_drain_message_ref_t structure and
    the ompi_crcp_bkmrk_pml_traffic_message_ref_t structure

Areas which was freed by the datatype-framework is referred in bkmrk-component.

Framework : crcp
Component : bkmrk
The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.h

The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : do_send_msg_detail, etc..

Here's the code that may cause the problem:

#define BLOCKNUM 1048576
#define SLPTIM 60

  if (rank == 0) {
    MPI_Comm_dup(MPI_COMM_WORLD,&commforcomm);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,commforcomm,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,commforcomm,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,commforcomm,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Comm_free(&commforcomm);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD); /** take checkpoint at this point **/
  }
  else { /* rank 1 */
    MPI_Comm_dup(MPI_COMM_WORLD,&commforcomm);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,commforcomm,&rreq); MPI_Wait(&rreq,&rsts);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,commforcomm,&rreq); MPI_Wait(&rreq,&rsts);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,commforcomm,&rreq); MPI_Wait(&rreq,&rsts);
    MPI_Comm_free(&commforcomm);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq); MPI_Wait(&rreq,&rsts);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq); MPI_Wait(&rreq,&rsts);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq); MPI_Wait(&rreq,&rsts);
    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIM); /** take checkpoint at this point **/
    printf(" rank=%d sleep end \n",rank); fflush(stdout);
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq);
    MPI_Wait(&rreq,&rsts);
  }

* Take checkpoint while Process 0 is in MPI_Send function and Process 1 is in sleep function

* When checkpoint is taken, "commforcomm" communicator is already freed.
  Although "commforcomm" communicator is already freed when checkpoint is taken,
  the information about "commforcomm" communicator is referred via these structure in the checkpoint action.

  Areas which is pointed by the "commforcomm" communicator pointer variable are already freed and values of the address may be already broken.

    struct ompi_crcp_bkmrk_pml_drain_message_ref_t {
           .
        /** Communicator pointer */
        ompi_communicator_t* comm;
           .

    }

    struct ompi_crcp_bkmrk_pml_traffic_message_ref_t {
           .
        /** Communicator pointer */
        ompi_communicator_t* comm;
           .
    }

    static int do_send_msg_detail( ... ) {
         .
        comm_my_rank = ompi_comm_rank(msg_ref->comm);
         .
    }

* I think that these structures should have information about communicator itself locally.
  they are c_contextid,c_my_rank,etc..

-bash-3.2$ cat t_mpi_question-4.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include "mpi.h"

#define BLOCKNUM 1048576
#define SLPTIM 60

int main(int ac,char **av)
{
  int i;
  int rank,size;
  int *wbuf;
  int *rbuf;
  MPI_Status rsts,ssts;
  MPI_Request rreq,sreq;
  MPI_Comm commforcomm;

  MPI_Init(&ac,&av);

  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  if (size != 2) { MPI_Abort(MPI_COMM_WORLD,-1); }

  rbuf = (int *)malloc(BLOCKNUM * sizeof(int));
  wbuf = (int *)malloc(BLOCKNUM * sizeof(int));
  if ((rbuf == NULL)||(wbuf == NULL)) { MPI_Abort(MPI_COMM_WORLD,-1); }

  printf(" rank=%d size=%d \n",rank,size); fflush(stdout);
  MPI_Barrier(MPI_COMM_WORLD);

  if (rank == 0) {
    MPI_Comm_dup(MPI_COMM_WORLD,&commforcomm);
    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (100 + i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,commforcomm,&sreq); MPI_Wait(&sreq,&ssts);
    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (200 + i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,commforcomm,&sreq); MPI_Wait(&sreq,&ssts);
    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (300 + i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,commforcomm,&sreq); MPI_Wait(&sreq,&ssts);
    MPI_Comm_free(&commforcomm);

    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (400 + i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD,&sreq); MPI_Wait(&sreq,&ssts);
    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (500 + i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD,&sreq); MPI_Wait(&sreq,&ssts);
    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (600 + i); }
    MPI_Isend(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD,&sreq); MPI_Wait(&sreq,&ssts);

    for (i=0;i<BLOCKNUM;i++) { wbuf[i] = (700 + i); }
    MPI_Send(wbuf,BLOCKNUM,MPI_INT,1,100,MPI_COMM_WORLD);
  }
  else {
    MPI_Comm_dup(MPI_COMM_WORLD,&commforcomm);
    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,commforcomm,&rreq); MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (100 + i)) { abort(); } }
    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,commforcomm,&rreq); MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (200 + i)) { abort(); } }
    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,commforcomm,&rreq); MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (300 + i)) { abort(); } }
    MPI_Comm_free(&commforcomm);

    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq); MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (400 + i)) { abort(); } }
    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq); MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (500 + i)) { abort(); } }
    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq); MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (600 + i)) { abort(); } }

    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIM);
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    for (i=0;i<BLOCKNUM;i++) { rbuf[i] = 0; }
    MPI_Irecv(rbuf,BLOCKNUM,MPI_INT,0,100,MPI_COMM_WORLD,&rreq);
    MPI_Wait(&rreq,&rsts);
    for (i=0;i<BLOCKNUM;i++) { if (rbuf[i] != (700 + i)) { abort(); } }
  }

  MPI_Barrier(MPI_COMM_WORLD);
  free(rbuf);
  free(wbuf);
  MPI_Finalize();
  if (rank == 0) {
    printf(" rank=%d program end \n",rank); fflush(stdout);
  }
  return(0);
}