Hi,
I am a long time happy user of mpi_comm_spawn() routine. But so far I
used it only with the MPI_COMM_WORLD communicator. Now I want to
execute more mpi_comm_spawn() routines, by creating and using group
communicators. However this seems to have some problems. I can get it
to run about 50% times on my laptop, but on some more "speedy"
machines it just produces the following message:
$ mpirun -n 4 a.out
[ala:31406] [[45304,0],0] ORTE_ERROR_LOG: Not found in file base/plm_base_launch_support.c at line 758
--------------------------------------------------------------------------
mpirun was unable to start the specified application as it encountered an error.
More information may be available above.
--------------------------------------------------------------------------
I am attaching the 2 programs needed to test the behavior. Compile:
$ mpif90 -o sps sps.f08 # spawned program
$ mpif90 mspbug.f08 # program with problems
$ mpirun -n 4 a.out
The compiler is gfortran-4.4.4, and openmpi is 1.4.2.
Needless to say it runs with mpich2, but mpich2 doesn't know how to
deal with stdin on a spawned process, so it's useless for my project :-(
Any ideas?
-------------------------------------------------
program sps
use mpi
implicit none
integer :: ier,nproc,me,pcomm,meroot,mi,on
integer, dimension(1:10) :: num
call mpi_init(ier)
mi=mpi_integer
call mpi_comm_rank(mpi_comm_world,me,ier)
meroot=0
on=1
call mpi_comm_get_parent(pcomm,ier)
call mpi_bcast(num,on,mi,meroot,pcomm,ier)
write(*,*)'sps>me,num=',me,num(on)
call mpi_finalize(ier)
end program sps
-------------------------------------------------
program groupspawn
use mpi
implicit none
! in the case use mpi does not work (eg Ubuntu) use the include below
! include 'mpif.h'
integer :: ier,intercom,nproc,meroot,info,mpierrs(1),mcw
integer :: i,myrepsiz,me,np,mcg,repdgrp,repdcom,on,mi,op
integer, dimension(1:10) :: myrepgrp
character(len=5) :: sarg(1),prog
integer, dimension(1:10) :: num,sm
integer :: newme,ngrp,igrp
call mpi_init(ier)
prog='sps'
sarg(1) = ''
nproc=2
on=1
meroot=0
mcw=mpi_comm_world
info=mpi_info_null
mi=mpi_integer
op=mpi_sum
mpierrs(1)=mpi_errcodes_ignore(1)
call mpi_comm_rank(mcw,me,ier)
call mpi_comm_size(mcw,np,ier)
ngrp=2 ! lets have some groups
myrepsiz=np/ngrp
igrp=me/myrepsiz
do i = 1, myrepsiz
myrepgrp(i)=i+me-mod(me,myrepsiz)-1
enddo
call mpi_comm_group(mcw,mcg,ier)
call mpi_group_incl(mcg,myrepsiz,myrepgrp,repdgrp,ier)
call mpi_comm_create(mcw,repdgrp,repdcom,ier)
! call mpi_comm_spawn(prog,sarg,nproc,info,meroot,mcw,intercom,mpierrs,ier)
call mpi_comm_spawn(prog,sarg,nproc,info,meroot,repdcom,intercom,mpierrs,ier)
! send a number to spawned ones...
call mpi_comm_rank(intercom,newme,ier)
write(*,*)'me,intercom,newme=',me,intercom,newme
num(1)=111*(igrp+1)
meroot=mpi_proc_null
if(newme == 0) meroot=mpi_root ! to send data
call mpi_bcast(num,on,mi,meroot,intercom,ier)
! sometimes there is no output from sps programs, so we wait here: WEIRD :-(
!call sleep(1)
call mpi_finalize(ier)
end program groupspawn
|