Open MPI logo

Open MPI User's Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Open MPI User's mailing list

From: Tim Prins (tprins_at_[hidden])
Date: 2007-03-01 09:15:43


Actually, I have also tried with the same version you are using and
cannot reproduce the behavior. Can you get a backtrace from the
segmentation fault?

Also, as Ralph suggested, you might want to upgrade and see if the
problem persists.

Tim

On Mar 1, 2007, at 8:52 AM, Ralph Castain wrote:

> One thing immediately leaps out at me - you are using a very old
> version of
> Open MPI. I suspect Tim is testing on a much newer version, most
> likely the
> 1.2 version that is about to be released in the next day or two.
>
> If it's at all possible, I would urge you to upgrade to 1.2 - if
> you would
> rather not wait for the official release, the web site's latest
> beta is
> virtually identical. I believe you will find the code much improved
> and
> worth the change.
>
> If you truly want to stick with the 1.1 family, then I would
> suggest you at
> least update to the latest release there (we are currently at
> 1.1.4, and
> 1.1.5 - which is planned to be the last in that series - is also
> coming out
> in the next day or two).
>
> Hope that helps
>
> Ralph
>
>
>
> On 3/1/07 4:44 AM, "Rozzen.VINCONT_at_[hidden]"
> <Rozzen.VINCONT_at_[hidden]> wrote:
>
>>
>> Thanks for your help.
>> Here is attached the output of ompi_info in the file ompi_info.txt.
>>
>> -----Message d'origine-----
>> De : users-bounces_at_[hidden] [mailto:users-bounces_at_[hidden]]
>> De la
>> part de Tim Prins
>> Envoyé : jeudi 1 mars 2007 05:45
>> À : Open MPI Users
>> Objet : Re: [OMPI users] MPI_Comm_Spawn
>>
>>
>> I have tried to reproduce this but cannot. I have been able to run
>> your test
>> program to over 100 spawns. So I can track this further, please
>> send the
>> output of ompi_info.
>>
>> Thanks,
>>
>> Tim
>>
>> On Tuesday 27 February 2007 10:15 am,
>> Rozzen.VINCONT_at_[hidden] wrote:
>>> Do you know if there is a limit to the number of MPI_Comm_spawn
>>> we can use
>>> in order to launch a program? I want to start and stop a program
>>> several
>>> times (with the function MPI_Comm_spawn) but every time after 31
>>> MPI_Comm_spawn, I get a "segmentation fault". Could you give me
>>> your point
>>> of you to solve this problem?
>>> Thanks
>>>
>>> /*file .c : spawned the file Exe*/
>>> #include <stdio.h>
>>> #include <malloc.h>
>>> #include <unistd.h>
>>> #include "mpi.h"
>>> #include <pthread.h>
>>> #include <signal.h>
>>> #include <sys/time.h>
>>> #include <errno.h>
>>> #define EXE_TEST "/home/workspace/test_spaw1/src/
>>> Exe"
>>>
>>>
>>>
>>> int main( int argc, char **argv ) {
>>>
>>> long *lpBufferMpi;
>>> MPI_Comm lIntercom;
>>> int lErrcode;
>>> MPI_Comm lCommunicateur;
>>> int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu,
>>> NiveauThreadObtenu,lTailleBuffer; int
>>> *lpMessageEnvoi=&lMessageEnvoi;
>>> MPI_Status lStatus; /*status de reception*/
>>>
>>> lIter=0;
>>>
>>>
>>> /* MPI environnement */
>>>
>>> printf("main*******************************\n");
>>> printf("main : Lancement MPI*\n");
>>>
>>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>>> MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>>> &NiveauThreadObtenu
>>> ); lpBufferMpi = calloc( 10000, sizeof(long));
>>> MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) );
>>>
>>> while (lIter<1000){
>>> lIter ++;
>>> lIntercom=(MPI_Comm)-1 ;
>>>
>>> MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL,
>>> 0, MPI_COMM_WORLD, &lIntercom, &lErrcode );
>>> printf( "%i main***MPI_Comm_spawn return : %d\n",lIter,
>>> lErrcode );
>>>
>>> if(lIntercom == (MPI_Comm)-1 ){
>>> printf("%i Intercom null\n",lIter);
>>> return 0;
>>> }
>>> MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur );
>>> MPI_Comm_rank( lCommunicateur, &lRangMain);
>>> lRangExe=1-lRangMain;
>>>
>>> printf("%i main***Rang main : %i Rang exe : %i
>>> \n",lIter,(int)lRangMain,(int)lRangExe); sleep(2);
>>>
>>> }
>>>
>>>
>>> /* Arret de l'environnement MPI */
>>> lTailleBuffer=10000* sizeof(long);
>>> MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer );
>>> MPI_Comm_free( &lCommunicateur );
>>> MPI_Finalize( );
>>> free( lpBufferMpi );
>>>
>>> printf( "Main = End .\n" );
>>> return 0;
>>>
>>> }
>>> /
>>> ********************************************************************
>>> ******
>>> **********************/ Exe:
>>> #include <string.h>
>>> #include <stdlib.h>
>>> #include <stdio.h>
>>> #include <malloc.h>
>>> #include <unistd.h> /* pour sleep() */
>>> #include <pthread.h>
>>> #include <semaphore.h>
>>> #include "mpi.h"
>>>
>>> int main( int argc, char **argv ) {
>>> /*1)pour communiaction MPI*/
>>> MPI_Comm lCommunicateur; /*communicateur du process*/
>>> MPI_Comm CommParent; /*Communiacteur parent à
>>> récupérer*/
>>> int lRank; /*rang du communicateur du
>>> process*/
>>> int lRangMain; /*rang du séquenceur si lancé en mode
>>> normal*/ int lTailleCommunicateur; /*taille du
>>> communicateur;*/
>>> long *lpBufferMpi; /*buffer pour message*/
>>> int lBufferSize; /*taille du buffer*/
>>>
>>> /*2) pour les thread*/
>>> int NiveauThreadVoulu, NiveauThreadObtenu;
>>>
>>>
>>> lCommunicateur = (MPI_Comm)-1;
>>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>>> int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>>> &NiveauThreadObtenu );
>>>
>>> if (erreur!=0){
>>> printf("erreur\n");
>>> free( lpBufferMpi );
>>> return -1;
>>> }
>>>
>>> /*2) Attachement à un buffer pour le message*/
>>> lBufferSize=10000 * sizeof(long);
>>> lpBufferMpi = calloc( 10000, sizeof(long));
>>> erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize );
>>>
>>> if (erreur!=0){
>>> printf("erreur\n");
>>> free( lpBufferMpi );
>>> return -1;
>>> }
>>>
>>> printf( "Exe : Lance \n" );
>>> MPI_Comm_get_parent(&CommParent);
>>> MPI_Intercomm_merge( CommParent, 1, &lCommunicateur );
>>> MPI_Comm_rank( lCommunicateur, &lRank );
>>> MPI_Comm_size( lCommunicateur, &lTailleCommunicateur );
>>> lRangMain =1-lRank;
>>> printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank ,
>>> lRangMain,
>>> lTailleCommunicateur);
>>>
>>> sleep(1);
>>> MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize );
>>> MPI_Comm_free( &lCommunicateur );
>>> MPI_Finalize( );
>>> free( lpBufferMpi );
>>> printf( "Exe: Fin.\n\n\n" );
>>> }
>>>
>>>
>>> /
>>> ********************************************************************
>>> ******
>>> **********************/ result :
>>> main*******************************
>>> main : Lancement MPI*
>>> 1 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 1 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>>
>>> 2 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 2 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>>
>>> 3 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 3 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>> ....
>>>
>>> 30 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 30 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Exe: Fin.
>>>
>>>
>>> 31 main***MPI_Comm_spawn return : 0
>>> Exe : Lance
>>> 31 main***Rang main : 0 Rang exe : 1
>>> Exe: lRankExe = 1 lRankMain = 0
>>> Erreur de segmentation
>>>
>>>
>>>
>>> _______________________________________________
>>> users mailing list
>>> users_at_[hidden]
>>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>>
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>>
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
>
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users