Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: [OMPI devel] Some questions about checkpoint/restart (11)
From: Takayuki Seki (seki_at_[hidden])
Date: 2010-04-02 01:14:29


11th question is as follows:

(11) The communication which uses inter-communicator deadlocks after taking checkpoint.

Framework : crcp
Component : bkmrk
The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : :drain_message_find_any

Here's the code that causes the problem:

#define SLPTIME 60

  buf = -1;
  if (rank == 0) {
    buf = 9014;
    MPI_Isend(&buf,1,MPI_INT,0,1000,intercomm,&req); /* using inter-communicator */

    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIME); /** take checkpoint at this point **/
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    MPI_Wait(&req,&sts);
  }
  else if (rank==1) {
    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIME); /** take checkpoint at this point **/
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    buf = 0;
    MPI_Irecv(&buf,1,MPI_INT,0,1000,intercomm,&req); /* using inter-communicator */
    MPI_Wait(&req,&sts);
  }

* Take checkpoint while Process 0 and Process 1 are in sleep function,
  then MPI program deadlocks.

* Here's my debugging output.
  ft_event_post_drain_message:Irecv drain_msg_ref=8a2f80 rank=0 tag=1000 cnt=1 ddt=4 to=8c27c0 [datatype->size=1]
  wait_quiesce_drained:xx=0 9014
  drain_message_find_any:Compare[peer=0] vpid=0 1 jobid=-431423487 -431423487 grp_proc_count=1 89cea0 1
  drain_message_find_any:Compare[peer=0] -> Continue

* Because matching of vpid,jobid by orte_util_compare_name_fields is failed,
  drain_message_find_any function does not call drain_message_find.
  And received messages in bkmrk is not found.
  Is orte_util_compare_name_fields function corresponding to inter-communicator?

-bash-3.2$ cat t_mpi_question-11.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"

#define SLPTIME 60

int main(int ac,char **av)
{
  int rank,size,cc,j,i,buf;
  MPI_Request req;
  MPI_Status sts;
  MPI_Comm localcomm,intercomm;
  MPI_Group worldgrp,localgrp;
  int local_grp_size,localrank,localsize,interrank,intersize;
  int *rank_list;
  int local_leader,remote_leader;

  rank=0;
  MPI_Init(&ac,&av);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  if (size%2 != 0) { MPI_Abort(MPI_COMM_WORLD,-1); }

  printf(" rank=%d pass-1 \n",rank); fflush(stdout);
  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Comm_group(MPI_COMM_WORLD,&worldgrp);

  local_grp_size = size / 2;
  rank_list = (int *)malloc(sizeof(int) * local_grp_size);
  if (rank_list == NULL) { MPI_Abort(MPI_COMM_WORLD,-1); }

  j = ((rank % 2) == 0) ? 0 : 1;
  for (i=0;i<local_grp_size;i++) {
    rank_list[i] = j;
    j+=2;
  }

  MPI_Group_incl(worldgrp,local_grp_size,rank_list,&localgrp);
  MPI_Comm_create(MPI_COMM_WORLD,localgrp,&localcomm);

  MPI_Comm_rank(localcomm,&localrank);
  MPI_Comm_size(localcomm,&localsize);

  printf(" rank=%d size=%d pass-3 LOCAL rank=%d size=%d \n"
    ,rank,size,localrank,localsize);
  fflush(stdout);
  MPI_Barrier(localcomm);
  MPI_Barrier(MPI_COMM_WORLD);

  local_leader = 0;
  remote_leader = ((rank % 2) == 0) ? 1 : 0;
  MPI_Intercomm_create(localcomm,local_leader,MPI_COMM_WORLD,
                            remote_leader,999,&intercomm);

  MPI_Comm_rank(intercomm,&interrank);
  MPI_Comm_size(intercomm,&intersize);

  printf(" rank=%d size=%d pass-4 LOCAL rank=%d size=%d INTER rank=%d size=%d \n"
    ,rank,size,localrank,localsize,interrank,intersize);
  fflush(stdout);

  MPI_Barrier(intercomm);
  MPI_Barrier(localcomm);
  MPI_Barrier(MPI_COMM_WORLD);

  buf = -1;
  if (rank == 0) {
    buf = 9014;
    MPI_Isend(&buf,1,MPI_INT,0,1000,intercomm,&req);

    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIME);
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    MPI_Wait(&req,&sts);
  }
  else if (rank==1) {
    printf(" rank=%d sleep start \n",rank); fflush(stdout);
    sleep(SLPTIME);
    printf(" rank=%d sleep end \n",rank); fflush(stdout);

    buf = 0;
    MPI_Irecv(&buf,1,MPI_INT,0,1000,intercomm,&req);
    MPI_Wait(&req,&sts);
  }
  printf(" rank=%d pass-5 buf=%d \n",rank,buf); fflush(stdout);

  MPI_Barrier(intercomm);
  MPI_Barrier(localcomm);
  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Comm_free(&intercomm);
  MPI_Comm_free(&localcomm);
  MPI_Group_free(&localgrp);
  MPI_Finalize();
  if (rank ==0) {
    printf(" rank=%d program end \n",rank); fflush(stdout);
  }
  return(0);
}