Open MPI logo

Open MPI User's Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Open MPI User's mailing list

From: Tim Prins (tprins_at_[hidden])
Date: 2007-03-05 16:34:06


Never mind, I was just able to replicate it. I'll look into it.

Tim

On Mar 5, 2007, at 4:26 PM, Tim Prins wrote:

> That is possible. Threading support is VERY lightly tested, but I
> doubt it is the problem since it always fails after 31 spawns.
>
> Again, I have tried with these configure options and the same version
> of Open MPI and have still have been able to replicate this (after
> letting it spawn over 500 times). Have you been able to try a more
> recent version of Open MPI? What kind of system is it? How many nodes
> are you running on?
>
> Tim
>
> On Mar 5, 2007, at 1:21 PM, Rozzen.VINCONT_at_[hidden] wrote:
>
>>
>> Maybe the problem comes from the configuration options.
>> The configuration options used are :
>> ./configure --enable-mpi-threads --enable-progress-threads --with-
>> threads=posix --enable-smp-locks
>> Could you give me your point of view about that please ?
>> Thanks
>>
>> -----Message d'origine-----
>> De : users-bounces_at_[hidden] [mailto:users-bounces_at_[hidden]]
>> De la
>> part de Ralph H Castain
>> Envoyé : mardi 27 février 2007 16:26
>> À : Open MPI Users <users_at_[hidden]>
>> Objet : Re: [OMPI users] MPI_Comm_Spawn
>>
>>
>> Now that's interesting! There shouldn't be a limit, but to be
>> honest, I've
>> never tested that mode of operation - let me look into it and see.
>> It sounds
>> like there is some counter that is overflowing, but I'll look.
>>
>> Thanks
>> Ralph
>>
>>
>> On 2/27/07 8:15 AM, "Rozzen.VINCONT_at_[hidden]"
>> <Rozzen.VINCONT_at_[hidden]> wrote:
>>
>>> Do you know if there is a limit to the number of MPI_Comm_spawn we
>>> can use in
>>> order to launch a program?
>>> I want to start and stop a program several times (with the function
>>> MPI_Comm_spawn) but every time after 31 MPI_Comm_spawn, I get a
>>> "segmentation
>>> fault".
>>> Could you give me your point of you to solve this problem?
>>> Thanks
>>>
>>> /*file .c : spawned the file Exe*/
>>> #include <stdio.h>
>>> #include <malloc.h>
>>> #include <unistd.h>
>>> #include "mpi.h"
>>> #include <pthread.h>
>>> #include <signal.h>
>>> #include <sys/time.h>
>>> #include <errno.h>
>>> #define EXE_TEST "/home/workspace/test_spaw1/src/
>>> Exe"
>>>
>>>
>>>
>>> int main( int argc, char **argv ) {
>>>
>>> long *lpBufferMpi;
>>> MPI_Comm lIntercom;
>>> int lErrcode;
>>> MPI_Comm lCommunicateur;
>>> int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu,
>>> NiveauThreadObtenu,lTailleBuffer;
>>> int *lpMessageEnvoi=&lMessageEnvoi;
>>> MPI_Status lStatus; /*status de reception*/
>>>
>>> lIter=0;
>>>
>>>
>>> /* MPI environnement */
>>>
>>> printf("main*******************************\n");
>>> printf("main : Lancement MPI*\n");
>>>
>>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>>> MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>>> &NiveauThreadObtenu );
>>> lpBufferMpi = calloc( 10000, sizeof(long));
>>> MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) );
>>>
>>> while (lIter<1000){
>>> lIter ++;
>>> lIntercom=(MPI_Comm)-1 ;
>>>
>>> MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL,
>>> 0, MPI_COMM_WORLD, &lIntercom, &lErrcode );
>>> printf( "%i main***MPI_Comm_spawn return : %d\n",lIter,
>>> lErrcode );
>>>
>>> if(lIntercom == (MPI_Comm)-1 ){
>>> printf("%i Intercom null\n",lIter);
>>> return 0;
>>> }
>>> MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur );
>>> MPI_Comm_rank( lCommunicateur, &lRangMain);
>>> lRangExe=1-lRangMain;
>>>
>>> printf("%i main***Rang main : %i Rang exe : %i
>>> \n",lIter,(int)lRangMain,(int)lRangExe);
>>> sleep(2);
>>>
>>> }
>>>
>>>
>>> /* Arret de l'environnement MPI */
>>> lTailleBuffer=10000* sizeof(long);
>>> MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer );
>>> MPI_Comm_free( &lCommunicateur );
>>> MPI_Finalize( );
>>> free( lpBufferMpi );
>>>
>>> printf( "Main = End .\n" );
>>> return 0;
>>>
>>> }
>>> /
>>> ********************************************************************
>>> *
>>> ********
>>> *******************/
>>> Exe:
>>> #include <string.h>
>>> #include <stdlib.h>
>>> #include <stdio.h>
>>> #include <malloc.h>
>>> #include <unistd.h> /* pour sleep() */
>>> #include <pthread.h>
>>> #include <semaphore.h>
>>> #include "mpi.h"
>>>
>>> int main( int argc, char **argv ) {
>>> /*1)pour communiaction MPI*/
>>> MPI_Comm lCommunicateur; /*communicateur du process*/
>>> MPI_Comm CommParent; /*Communiacteur parent à
>>> récupérer*/
>>> int lRank; /*rang du communicateur du
>>> process*/
>>> int lRangMain; /*rang du séquenceur si lancé en
>>> mode normal*/
>>> int lTailleCommunicateur; /*taille du communicateur;*/
>>> long *lpBufferMpi; /*buffer pour message*/
>>> int lBufferSize; /*taille du buffer*/
>>>
>>> /*2) pour les thread*/
>>> int NiveauThreadVoulu, NiveauThreadObtenu;
>>>
>>>
>>> lCommunicateur = (MPI_Comm)-1;
>>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>>> int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>>> &NiveauThreadObtenu );
>>>
>>> if (erreur!=0){
>>> printf("erreur\n");
>>> free( lpBufferMpi );
>>> return -1;
>>> }
>>>
>>> /*2) Attachement à un buffer pour le message*/
>>> lBufferSize=10000 * sizeof(long);
>>> lpBufferMpi = calloc( 10000, sizeof(long));
>>> erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize );
>>>
>>> if (erreur!=0){
>>> printf("erreur\n");
>>> free( lpBufferMpi );
>>> return -1;
>>> }
>>>
>>> printf( "Exe : Lance \n" );
>>> MPI_Comm_get_parent(&CommParent);
>>> MPI_Intercomm_merge( CommParent, 1, &lCommunicateur );
>>> MPI_Comm_rank( lCommunicateur, &lRank );
>>> MPI_Comm_size( lCommunicateur, &lTailleCommunicateur );
>>> lRangMain =1-lRank;
>>> printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank ,
>>> lRangMain,
>>> lTailleCommunicateur);
>>>
>>> sleep(1);
>>> MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize );
>>> MPI_Comm_free( &lCommunicateur );
>>> MPI_Finalize( );
>>> free( lpBufferMpi );
>>> printf( "Exe: Fin.\n\n\n" );
>>> }
>>>
>>>
>>> /
>>> ********************************************************************
>>> *
>>> ********
>>> *******************/
>>> result :
>>> main*******************************
>>> main : Lancement MPI*
>>> 1 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 1 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>>
>>> 2 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 2 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>>
>>> 3 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 3 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>> ....
>>>
>>> 30 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 30 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>>
>>> 31 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 31 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Erreur de segmentation
>>>
>>>
>>>
>>> _______________________________________________
>>> users mailing list
>>> users_at_[hidden]
>>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>>
>>
>>
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>>
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users