Open MPI logo

Open MPI User's Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Open MPI User's mailing list

From: Ralph Castain (rhc_at_[hidden])
Date: 2007-03-01 08:52:36


One thing immediately leaps out at me - you are using a very old version of
Open MPI. I suspect Tim is testing on a much newer version, most likely the
1.2 version that is about to be released in the next day or two.

If it's at all possible, I would urge you to upgrade to 1.2 - if you would
rather not wait for the official release, the web site's latest beta is
virtually identical. I believe you will find the code much improved and
worth the change.

If you truly want to stick with the 1.1 family, then I would suggest you at
least update to the latest release there (we are currently at 1.1.4, and
1.1.5 - which is planned to be the last in that series - is also coming out
in the next day or two).

Hope that helps

Ralph

On 3/1/07 4:44 AM, "Rozzen.VINCONT_at_[hidden]"
<Rozzen.VINCONT_at_[hidden]> wrote:

>
> Thanks for your help.
> Here is attached the output of ompi_info in the file ompi_info.txt.
>
> -----Message d'origine-----
> De : users-bounces_at_[hidden] [mailto:users-bounces_at_[hidden]]De la
> part de Tim Prins
> Envoyé : jeudi 1 mars 2007 05:45
> À : Open MPI Users
> Objet : Re: [OMPI users] MPI_Comm_Spawn
>
>
> I have tried to reproduce this but cannot. I have been able to run your test
> program to over 100 spawns. So I can track this further, please send the
> output of ompi_info.
>
> Thanks,
>
> Tim
>
> On Tuesday 27 February 2007 10:15 am, Rozzen.VINCONT_at_[hidden] wrote:
>> Do you know if there is a limit to the number of MPI_Comm_spawn we can use
>> in order to launch a program? I want to start and stop a program several
>> times (with the function MPI_Comm_spawn) but every time after 31
>> MPI_Comm_spawn, I get a "segmentation fault". Could you give me your point
>> of you to solve this problem?
>> Thanks
>>
>> /*file .c : spawned the file Exe*/
>> #include <stdio.h>
>> #include <malloc.h>
>> #include <unistd.h>
>> #include "mpi.h"
>> #include <pthread.h>
>> #include <signal.h>
>> #include <sys/time.h>
>> #include <errno.h>
>> #define EXE_TEST "/home/workspace/test_spaw1/src/Exe"
>>
>>
>>
>> int main( int argc, char **argv ) {
>>
>> long *lpBufferMpi;
>> MPI_Comm lIntercom;
>> int lErrcode;
>> MPI_Comm lCommunicateur;
>> int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu,
>> NiveauThreadObtenu,lTailleBuffer; int *lpMessageEnvoi=&lMessageEnvoi;
>> MPI_Status lStatus; /*status de reception*/
>>
>> lIter=0;
>>
>>
>> /* MPI environnement */
>>
>> printf("main*******************************\n");
>> printf("main : Lancement MPI*\n");
>>
>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>> MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, &NiveauThreadObtenu
>> ); lpBufferMpi = calloc( 10000, sizeof(long));
>> MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) );
>>
>> while (lIter<1000){
>> lIter ++;
>> lIntercom=(MPI_Comm)-1 ;
>>
>> MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL,
>> 0, MPI_COMM_WORLD, &lIntercom, &lErrcode );
>> printf( "%i main***MPI_Comm_spawn return : %d\n",lIter, lErrcode );
>>
>> if(lIntercom == (MPI_Comm)-1 ){
>> printf("%i Intercom null\n",lIter);
>> return 0;
>> }
>> MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur );
>> MPI_Comm_rank( lCommunicateur, &lRangMain);
>> lRangExe=1-lRangMain;
>>
>> printf("%i main***Rang main : %i Rang exe : %i
>> \n",lIter,(int)lRangMain,(int)lRangExe); sleep(2);
>>
>> }
>>
>>
>> /* Arret de l'environnement MPI */
>> lTailleBuffer=10000* sizeof(long);
>> MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer );
>> MPI_Comm_free( &lCommunicateur );
>> MPI_Finalize( );
>> free( lpBufferMpi );
>>
>> printf( "Main = End .\n" );
>> return 0;
>>
>> }
>> /**************************************************************************
>> **********************/ Exe:
>> #include <string.h>
>> #include <stdlib.h>
>> #include <stdio.h>
>> #include <malloc.h>
>> #include <unistd.h> /* pour sleep() */
>> #include <pthread.h>
>> #include <semaphore.h>
>> #include "mpi.h"
>>
>> int main( int argc, char **argv ) {
>> /*1)pour communiaction MPI*/
>> MPI_Comm lCommunicateur; /*communicateur du process*/
>> MPI_Comm CommParent; /*Communiacteur parent à récupérer*/
>> int lRank; /*rang du communicateur du process*/
>> int lRangMain; /*rang du séquenceur si lancé en mode
>> normal*/ int lTailleCommunicateur; /*taille du communicateur;*/
>> long *lpBufferMpi; /*buffer pour message*/
>> int lBufferSize; /*taille du buffer*/
>>
>> /*2) pour les thread*/
>> int NiveauThreadVoulu, NiveauThreadObtenu;
>>
>>
>> lCommunicateur = (MPI_Comm)-1;
>> NiveauThreadVoulu = MPI_THREAD_MULTIPLE;
>> int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu,
>> &NiveauThreadObtenu );
>>
>> if (erreur!=0){
>> printf("erreur\n");
>> free( lpBufferMpi );
>> return -1;
>> }
>>
>> /*2) Attachement à un buffer pour le message*/
>> lBufferSize=10000 * sizeof(long);
>> lpBufferMpi = calloc( 10000, sizeof(long));
>> erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize );
>>
>> if (erreur!=0){
>> printf("erreur\n");
>> free( lpBufferMpi );
>> return -1;
>> }
>>
>> printf( "Exe : Lance \n" );
>> MPI_Comm_get_parent(&CommParent);
>> MPI_Intercomm_merge( CommParent, 1, &lCommunicateur );
>> MPI_Comm_rank( lCommunicateur, &lRank );
>> MPI_Comm_size( lCommunicateur, &lTailleCommunicateur );
>> lRangMain =1-lRank;
>> printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank , lRangMain,
>> lTailleCommunicateur);
>>
>> sleep(1);
>> MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize );
>> MPI_Comm_free( &lCommunicateur );
>> MPI_Finalize( );
>> free( lpBufferMpi );
>> printf( "Exe: Fin.\n\n\n" );
>> }
>>
>>
>> /**************************************************************************
>> **********************/ result :
>> main*******************************
>> main : Lancement MPI*
>> 1 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 1 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>>
>> 2 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 2 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>>
>> 3 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 3 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>> ....
>>
>> 30 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 30 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Exe: Fin.
>>
>>
>> 31 main***MPI_Comm_spawn return : 0
>> Exe : Lance
>> 31 main***Rang main : 0 Rang exe : 1
>> Exe: lRankExe = 1 lRankMain = 0
>> Erreur de segmentation
>>
>>
>>
>> _______________________________________________
>> users mailing list
>> users_at_[hidden]
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users
>
> _______________________________________________
> users mailing list
> users_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/users