You might want to send this to the MPICH mailing lists - this is for Open MPI issues.

On Oct 27, 2011, at 4:59 PM, Jonathan Bishop wrote:

I am using MPI_Comm_spawn to dynamically run workers. However, when the workers exit they get hung up on MPI_Finalize. Here is a short program which shows the issue...

It responds to several commands...

Do

start
stop

and then check how many processes are running - it should be 1, not 2.

I am using MPICH2 1.4.1-p1.

Thanks,

Jon

#include <sys/types.h>
#include <unistd.h>
#include <iostream>
#include "mpi.h"

using namespace std;


main(int argc, char **argv)
{
  MPI_Init(&argc, &argv);
  MPI_Comm parent;
  MPI_Comm_get_parent(&parent);

  // Master
  if (parent == MPI_COMM_NULL) {
    cout << getpid() << endl;
    MPI_Comm intercom = MPI_COMM_NULL;
    while (1) {
      cout << "Enter: ";
      string s;
      cin >> s;
      if (s == "start") {
if (intercom != MPI_COMM_NULL) {
 cout << "already started" << endl;
 continue;
}
MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 1, MPI_INFO_NULL, 0, MPI_COMM_SELF, &intercom,  MPI_ERRCODES_IGNORE);
continue;
      }
      if (s == "stop") {
if (intercom == MPI_COMM_NULL) {
 cout << "worker not running" << endl;
 continue;
}
MPI_Send(const_cast<char*>(s.c_str()), s.size(), MPI_CHAR, 0, 0, intercom);
intercom = MPI_COMM_NULL;
// MPI_Finalize();  // This will allow the workers to die, but then I can not restart them.
continue;
      }
      if (s == "exit") {
if (intercom != MPI_COMM_NULL) {
 cout << "need to stop before exit" << endl;
 continue;
}
break;
      }
      if (intercom == MPI_COMM_NULL) {
cout << "need to start" << endl;
continue;
      }
      MPI_Send(const_cast<char*>(s.c_str()), s.size(), MPI_CHAR, 0, 0, intercom);
      char buf[1000];
      MPI_Status status;
      MPI_Recv(buf, 1000, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status);
      int count;
      MPI_Get_count(&status, MPI_CHAR, &count);
      buf[count] = 0;
      string t = buf;
      cout << "worker returned " << t << endl;
    }
  }

  // Worker
  if (parent != MPI_COMM_NULL) {
    while (1) {
      char buf[1000];
      MPI_Status status;
      MPI_Recv(buf, 1000, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, parent, &status);
      int count;
      MPI_Get_count(&status, MPI_CHAR, &count);
      buf[count] = 0;
      string s = buf;
      if (s == "stop") {
cout << "worker stopping" << endl;
break;
      }
      MPI_Send(const_cast<char*>(s.c_str()), s.size(), MPI_CHAR, 0, 0, parent);
    }
  }

  MPI_Finalize();
}






_______________________________________________
users mailing list
users@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users