Open MPI logo

Open MPI User's Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Open MPI User's mailing list

From: Tim Prins (tprins_at_[hidden])
Date: 2007-03-05 16:26:42


That is possible. Threading support is VERY lightly tested, but I
doubt it is the problem since it always fails after 31 spawns.

Again, I have tried with these configure options and the same version
of Open MPI and have still have been able to replicate this (after
letting it spawn over 500 times). Have you been able to try a more
recent version of Open MPI? What kind of system is it? How many nodes
are you running on?

Tim

On Mar 5, 2007, at 1:21 PM, Rozzen.VINCONT_at_[hidden] wrote:

>
> Maybe the problem comes from the configuration options.
> The configuration options used are :
> ./configure --enable-mpi-threads --enable-progress-threads --with-
> threads=posix --enable-smp-locks
> Could you give me your point of view about that please ?
> Thanks
>
> -----Message d'origine-----
> De : users-bounces_at_[hidden] [mailto:users-bounces_at_[hidden]]
> De la
> part de Ralph H Castain
> Envoyé : mardi 27 février 2007 16:26
> À : Open MPI Users <users_at_[hidden]>
> Objet : Re: [OMPI users] MPI_Comm_Spawn
>
>
> Now that's interesting! There shouldn't be a limit, but to be
> honest, I've
> never tested that mode of operation - let me look into it and see.
> It sounds
> like there is some counter that is overflowing, but I'll look.
>
> Thanks
> Ralph
>
>
> On 2/27/07 8:15 AM, "Rozzen.VINCONT_at_[hidden]"
> <Rozzen.VINCONT_at_[hidden]> wrote:
>
>> Do you know if there is a limit to the number of MPI_Comm_spawn we
>> can use in
>> order to launch a program?
>> I want to start and stop a program several times (with the function
>> MPI_Comm_spawn) but every time after 31 MPI_Comm_spawn, I get a
>> "segmentation
>> fault".
>> Could you give me your point of you to solve this problem?
>> Thanks
>>
>> /*file .c : spawned the file Exe*/
>> #include <stdio.h>
>> #include <malloc.h>
>> #include <unistd.h>
>> #include "mpi.h"
>> #include <pthread.h>
>> #include <signal.h>
>> #include <sys/time.h>
>> #include <errno.h>
>> #define EXE_TEST "/home/workspace/test_spaw1/src/Exe"
>>
>>
>>
>> int main( int argc, char **argv ) {
>>
>> long *lpBufferMpi;
>> MPI_Comm lIntercom;
>> int lErrcode;
>> MPI_Comm lCommunicateur;
>> int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu,
>> NiveauThreadObtenu,lTailleBuffer;
>> int *lpMessageEnvoi=&lMessageEnvoi;
>> MPI_Status lStatus; /*status de reception*/
>>
>> lIter=0;
>>
>>
>> /* MPI environnement */
>>
>> printf("main*******************************\n");
>> printf("main : Lancement MPI*\n");
>>
>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>> MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>> &NiveauThreadObtenu );
>> lpBufferMpi = calloc( 10000, sizeof(long));
>> MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) );
>>
>> while (lIter<1000){
>> lIter ++;
>> lIntercom=(MPI_Comm)-1 ;
>>
>> MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL,
>> 0, MPI_COMM_WORLD, &lIntercom, &lErrcode );
>> printf( "%i main***MPI_Comm_spawn return : %d\n",lIter,
>> lErrcode );
>>
>> if(lIntercom == (MPI_Comm)-1 ){
>> printf("%i Intercom null\n",lIter);
>> return 0;
>> }
>> MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur );
>> MPI_Comm_rank( lCommunicateur, &lRangMain);
>> lRangExe=1-lRangMain;
>>
>> printf("%i main***Rang main : %i Rang exe : %i
>> \n",lIter,(int)lRangMain,(int)lRangExe);
>> sleep(2);
>>
>> }
>>
>>
>> /* Arret de l'environnement MPI */
>> lTailleBuffer=10000* sizeof(long);
>> MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer );
>> MPI_Comm_free( &lCommunicateur );
>> MPI_Finalize( );
>> free( lpBufferMpi );
>>
>> printf( "Main = End .\n" );
>> return 0;
>>
>> }
>> /
>> *********************************************************************
>> ********
>> *******************/
>> Exe:
>> #include <string.h>
>> #include <stdlib.h>
>> #include <stdio.h>
>> #include <malloc.h>
>> #include <unistd.h> /* pour sleep() */
>> #include <pthread.h>
>> #include <semaphore.h>
>> #include "mpi.h"
>>
>> int main( int argc, char **argv ) {
>> /*1)pour communiaction MPI*/
>> MPI_Comm lCommunicateur; /*communicateur du process*/
>> MPI_Comm CommParent; /*Communiacteur parent à
>> récupérer*/
>> int lRank; /*rang du communicateur du
>> process*/
>> int lRangMain; /*rang du séquenceur si lancé en
>> mode normal*/
>> int lTailleCommunicateur; /*taille du communicateur;*/
>> long *lpBufferMpi; /*buffer pour message*/
>> int lBufferSize; /*taille du buffer*/
>>
>> /*2) pour les thread*/
>> int NiveauThreadVoulu, NiveauThreadObtenu;
>>
>>
>> lCommunicateur = (MPI_Comm)-1;
>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>> int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>> &NiveauThreadObtenu );
>>
>> if (erreur!=0){
>> printf("erreur\n");
>> free( lpBufferMpi );
>> return -1;
>> }
>>
>> /*2) Attachement à un buffer pour le message*/
>> lBufferSize=10000 * sizeof(long);
>> lpBufferMpi = calloc( 10000, sizeof(long));
>> erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize );
>>
>> if (erreur!=0){
>> printf("erreur\n");
>> free( lpBufferMpi );
>> return -1;
>> }
>>
>> printf( "Exe : Lance \n" );
>> MPI_Comm_get_parent(&CommParent);
>> MPI_Intercomm_merge( CommParent, 1, &lCommunicateur );
>> MPI_Comm_rank( lCommunicateur, &lRank );
>> MPI_Comm_size( lCommunicateur, &lTailleCommunicateur );
>> lRangMain =1-lRank;
>> printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank ,
>> lRangMain,
>> lTailleCommunicateur);
>>
>> sleep(1);
>> MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize );
>> MPI_Comm_free( &lCommunicateur );
>> MPI_Finalize( );
>> free( lpBufferMpi );
>> printf( "Exe: Fin.\n\n\n" );
>> }
>>
>>
>> /
>> *********************************************************************
>> ********
>> *******************/
>> result :
>> main*******************************
>> main : Lancement MPI*
>> 1 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 1 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>>
>> 2 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 2 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>>
>> 3 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 3 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>> ....
>>
>> 30 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 30 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>>
>> 31 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 31 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Erreur de segmentation
>>
>>
>>
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
>
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users