Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

From: Jose Quiroga (joseluisquiroga_at_[hidden])
Date: 2006-11-08 16:31:09


/*

Hi OpenMPI people,

This mail is itself the c code that will reproduce the
 bug.

The basic idea is that in a single processor debian
machine, only TCP messaging, 1.1.2 (and previous
versions) abort with this sample program during
MPI_Barrier when run with three processes (more will
fail too but more unpredictable).

My best guess is that PtoP messaging (if not fully
paired in sends and recevs between collective
instrucs) is interfering with collective messaging.

Attached is the gzip of config.log (the zip one
"may cause irreparable damage to your computer and
your files").

Thanks for OpenMPI.

JLQ.

*/

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

// One debian machine. 3 processes. Only TCP
messaging.

// IT MIGHT BE A LITTLE TRICKY TO GET THE ACTUAL ERROR
// (one more reason to think is a bug) because the
// program might just run !

// Playing a little bit with the following defines
// should get you the error listed at the end.

// Observation. This is a simple scenario to reproduce
// the bug and you cannnot use a big BUFF_SZ because
// the program might lock (all processes trying to
// send).

#define KNT_1 10
#define BUFF_SZ 100

enum brain_exception_code {
        k_tag_1 = 100,
        k_tag_2
};

void BARRIER_BUG_CALL(){
        int ok = MPI_Barrier(MPI_COMM_WORLD);
}

int main (int argc, char **argv) {

        int my_rank = 0, num_ranks = 0;
        MPI_Request request;
        MPI_Status status;
        long out_buff[BUFF_SZ], in_buff[BUFF_SZ];
        int in_flag = 0;

        MPI_Init(&argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
        MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);

        MPI_Irecv(in_buff, BUFF_SZ, MPI_LONG,
MPI_ANY_SOURCE,

                MPI_ANY_TAG, MPI_COMM_WORLD, &request
);

        for(long bb = 0; bb < KNT_1; bb++){
                long hh = 0;

                for(hh = 0; hh < num_ranks; hh++){
                        if(hh != my_rank){
                                for(long nn = 0; nn <
BUFF_SZ; nn++){
                                        out_buff[nn] =
bb;
                                }
                                MPI_Send(out_buff,
BUFF_SZ, MPI_LONG, hh, k_tag_1,
MPI_COMM_WORLD);
                        }
                }

                MPI_Test(&request, &in_flag, &status);
                if(in_flag){
                        int src = status.MPI_SOURCE;
                        //printf(" %i.%i.%i.%i.",
my_rank, bb, src,
in_buff[0]);
                        MPI_Irecv(in_buff, BUFF_SZ,
MPI_LONG,
MPI_ANY_SOURCE,
                                MPI_ANY_TAG,
MPI_COMM_WORLD, &request);
                }
        }

        BARRIER_BUG_CALL();

        if(my_rank == 0){
                printf("FINISHED 0.\n");
        }

        MPI_Finalize();

        return 0;
}

/* run command:
mpirun -np 3 -host 127.0.0.1 barrier_bug
*/

/*
Signal:11 info.si_errno:0(Success)
si_code:1(SEGV_MAPERR)
Failing at addr:0x28
[0] func:/lib/openmpi/lib/libopal.so.0 [0x4011cd8f]
[1] func:/lib/libpthread.so.0 [0x402588cb]
[2] func:/lib/libc.so.6 [0x402c76f0]
[3]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_frag_match+0x1e3)
[0x404782e3]
[4]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_frag_callback+0x88)
[0x40477cf8]
[5]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_btl_sm.so(mca_btl_sm_component_progress+0x84b)
[0x4048a80b]
[6]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_bml_r2.so(mca_bml_r2_progress+0x36)
[0x40480dc6]
[7]
func:/lib/openmpi/lib/libopal.so.0(opal_progress+0xb9)
[0x40106bd9]
[8] func:/lib/openmpi-1.1.2/lib/openmpi/mca_pml_ob1.so
[0x404764b5]
[9]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv+0x1dc)
[0x4047624c]
[10]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_coll_tuned.so(ompi_coll_tuned_barrier_intra_recursivedoubling+0x253)
[0x404b1da3]
[11]
func:/lib/openmpi-1.1.2/lib/openmpi/mca_coll_tuned.so(ompi_coll_tuned_barrier_intra_dec_fixed+0x28)
[0x404af818]
[12]
func:/lib/openmpi/lib/libmpi.so.0(PMPI_Barrier+0xdc)
[0x4007959c]
[13] func:barrier_bug(_Z16BARRIER_BUG_CALLv+0x12)
[0x804ea16]
[14] func:barrier_bug(main+0x1db) [0x804ebf7]
[15] func:/lib/libc.so.6(__libc_start_main+0xc6)
[0x402b3e36]
[16] func:barrier_bug(__gxx_personality_v0+0x42d)
[0x804e961]
*** End of error message ***
*/

/* ompi_info output:

                Open MPI: 1.1.2
   Open MPI SVN revision: r12073
                Open RTE: 1.1.2
   Open RTE SVN revision: r12073
                    OPAL: 1.1.2
       OPAL SVN revision: r12073
                  Prefix: /lib/openmpi-1.1.2
 Configured architecture: i686-pc-linux-gnu
           Configured by: webmgr
           Configured on: Sun Oct 29 09:49:39 COT 2006
          Configure host: aculiar.aculiar.com
                Built by: webmgr
                Built on: dom oct 29 12:15:26 COT 2006
              Built host: aculiar.aculiar.com
              C bindings: yes
            C++ bindings: yes
      Fortran77 bindings: no
      Fortran90 bindings: no
 Fortran90 bindings size: na
              C compiler: gcc
     C compiler absolute: /usr/bin/gcc
            C++ compiler: g++
   C++ compiler absolute: /usr/bin/g++
      Fortran77 compiler: none
  Fortran77 compiler abs: none
      Fortran90 compiler: none
  Fortran90 compiler abs: none
             C profiling: yes
           C++ profiling: yes
     Fortran77 profiling: no
     Fortran90 profiling: no
          C++ exceptions: no
          Thread support: posix (mpi: no, progress:
no)
  Internal debug support: no
     MPI parameter check: runtime
Memory profiling support: no
Memory debugging support: no
         libltdl support: yes
              MCA memory: ptmalloc2 (MCA v1.0, API
v1.0, Component v1.1.2)
           MCA paffinity: linux (MCA v1.0, API v1.0,
Component v1.1.2)
           MCA maffinity: first_use (MCA v1.0, API
v1.0, Component v1.1.2)
               MCA timer: linux (MCA v1.0, API v1.0,
Component v1.1.2)
           MCA allocator: basic (MCA v1.0, API v1.0,
Component v1.0)
           MCA allocator: bucket (MCA v1.0, API v1.0,
Component v1.0)
                MCA coll: basic (MCA v1.0, API v1.0,
Component v1.1.2)
                MCA coll: hierarch (MCA v1.0, API
v1.0, Component v1.1.2)
                MCA coll: self (MCA v1.0, API v1.0,
Component v1.1.2)
                MCA coll: sm (MCA v1.0, API v1.0,
Component v1.1.2)
                MCA coll: tuned (MCA v1.0, API v1.0,
Component v1.1.2)
                  MCA io: romio (MCA v1.0, API v1.0,
Component v1.1.2)
               MCA mpool: sm (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA pml: ob1 (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA bml: r2 (MCA v1.0, API v1.0,
Component v1.1.2)
              MCA rcache: rb (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA btl: self (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA btl: sm (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA btl: tcp (MCA v1.0, API v1.0,
Component v1.0)
                MCA topo: unity (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA osc: pt2pt (MCA v1.0, API v1.0,
Component v1.0)
                 MCA gpr: null (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA gpr: proxy (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA gpr: replica (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA iof: proxy (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA iof: svc (MCA v1.0, API v1.0,
Component v1.1.2)
                  MCA ns: proxy (MCA v1.0, API v1.0,
Component v1.1.2)
                  MCA ns: replica (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA oob: tcp (MCA v1.0, API v1.0,
Component v1.0)
                 MCA ras: dash_host (MCA v1.0, API
v1.0, Component v1.1.2)
                 MCA ras: hostfile (MCA v1.0, API
v1.0, Component v1.1.2)
                 MCA ras: localhost (MCA v1.0, API
v1.0, Component v1.1.2)
                 MCA ras: poe (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA ras: slurm (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA rds: hostfile (MCA v1.0, API
v1.0, Component v1.1.2)
                 MCA rds: resfile (MCA v1.0, API v1.0,
Component v1.1.2)
               MCA rmaps: round_robin (MCA v1.0, API
v1.0, Component v1.1.2)
                MCA rmgr: proxy (MCA v1.0, API v1.0,
Component v1.1.2)
                MCA rmgr: urm (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA rml: oob (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA pls: fork (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA pls: rsh (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA pls: slurm (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA sds: env (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA sds: pipe (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA sds: seed (MCA v1.0, API v1.0,
Component v1.1.2)
                 MCA sds: singleton (MCA v1.0, API
v1.0, Component v1.1.2)
                 MCA sds: slurm (MCA v1.0, API v1.0,
Component v1.1.2)
*/

 
____________________________________________________________________________________
Yahoo! Music Unlimited
Access over 1 million songs.
http://music.yahoo.com/unlimited