Although I do not think that Valgrind supports mpic++, I have tried to run it. 

This is what I got, thanks
------------------------------------------------------------
==18729== Memcheck, a memory error detector
==18729== Copyright (C) 2002-2009, and GNU GPL'd, by Julian Seward et al.
==18729== Using Valgrind-3.5.0 and LibVEX; rerun with -h for copyright info
==18729== Command: ./nsga2b
==18729== Parent PID: 18726
==18729==
--18729--
--18729-- Valgrind options:
--18729--    --tool=memcheck
--18729--    --error-limit=no
--18729--    --leak-check=full
--18729--    --log-file=nsga2b_valg.log
--18729--    -v
--18729-- Contents of /proc/version:
--18729--   Linux version 2.6.18-128.1.1.el5.530g0000 (root@kalamata) (gcc version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP Tue Mar 17 21:49:24 EDT 2009
--18729-- Arch and hwcaps: AMD64, amd64-sse3-cx16
--18729-- Page sizes: currently 4096, max supported 4096
--18729-- Valgrind library directory: /usr/lib64/valgrind
--18729-- Reading syms from /lustre/nsga2b (0x400000)
--18729-- warning: DiCfSI 0x0 .. 0x0 outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x1 .. 0x3 outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x4 .. 0x2a outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x0 .. 0x0 outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x1 .. 0x3 outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x4 .. 0x2a outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x0 .. 0x0 outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x1 .. 0x3 outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0x4 .. 0xb outside segment 0x4438f0 .. 0xd81e77
--18729-- warning: DiCfSI 0xc .. 0xaa outside segment 0x4438f0 .. 0xd81e77
--18729-- Reading syms from /usr/lib64/valgrind/memcheck-amd64-linux (0x38000000)
--18729--    object doesn't have a dynamic symbol table
--18729-- Reading syms from /lib64/ld-2.5.so (0x3f75c00000)
--18729-- Reading suppressions file: /usr/lib64/valgrind/default.supp
--18729-- REDIR: 0x3f75c145d0 (strlen) redirected to 0x3803e767 (vgPlain_amd64_linux_REDIR_FOR_strlen)
--18729-- Reading syms from /usr/lib64/valgrind/vgpreload_core-amd64-linux.so (0x4802000)
--18729-- Reading syms from /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so (0x4a03000)
==18729== WARNING: new redirection conflicts with existing -- ignoring it
--18729--     new: 0x3f75c145d0 (strlen              ) R-> 0x04a06dc0 strlen
--18729-- REDIR: 0x3f75c143f0 (index) redirected to 0x4a06c30 (index)
--18729-- REDIR: 0x3f75c145a0 (strcmp) redirected to 0x4a06e90 (strcmp)
--18729-- Reading syms from /opt/openmpi-1.3.4-gnu/lib/libmpi_cxx.so.0.0.0 (0x4c0a000)
--18729-- Reading syms from /opt/openmpi-1.3.4-gnu/lib/libmpi.so.0.0.1 (0x4e2600

--18729-- Reading syms from /opt/openmpi-1.3.4-gnu/lib/libmpi.so.0.0.1 (0x4e26000)
--18729-- Reading syms from /opt/openmpi-1.3.4-gnu/lib/libopen-rte.so.0.0.0 (0x5258000)
--18729-- Reading syms from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0.0.0 (0x54db000)
--18729-- Reading syms from /usr/lib64/librdmacm.so.1.0.0 (0x3f77000000)
--18729--    object doesn't have a symbol table
--18729-- Reading syms from /usr/lib64/libibverbs.so.1.0.0 (0x3f76400000)
--18729--    object doesn't have a symbol table
--18729-- Reading syms from /usr/lib64/libdat.so.1.0.2 (0x5778000)
--18729--    object doesn't have a symbol table
--18729-- Reading syms from /scratch/torque-2.4.2/lib/libtorque.so.2.0.0 (0x5982000)
--18729-- Reading syms from /lib64/libdl-2.5.so (0x3f76800000)
--18729-- Reading syms from /lib64/libnsl-2.5.so (0x3f7fe00000)
--18729-- Reading syms from /lib64/libutil-2.5.so (0x3f84e00000)
--18729-- Reading syms from /lib64/libm-2.5.so (0x5c97000)
--18729-- Reading syms from /usr/lib64/libstdc++.so.6.0.8 (0x3f7c800000)
--18729--    object doesn't have a symbol table
--18729-- Reading syms from /lib64/libgcc_s-4.1.2-20080825.so.1 (0x3f7b800000)
--18729--    object doesn't have a symbol table
--18729-- Reading syms from /lib64/libpthread-2.5.so (0x3f76c00000)
--18729-- Reading syms from /lib64/libc-2.5.so (0x3f76000000)
--18729-- REDIR: 0x3f7607ae00 (memset) redirected to 0x4a07030 (memset)
--18729-- REDIR: 0x3f7607c240 (memcpy) redirected to 0x4a08030 (memcpy)
--18729-- REDIR: 0x3f76079f40 (rindex) redirected to 0x4a06ae0 (rindex)
--18729-- REDIR: 0x3f76079b50 (strlen) redirected to 0x4a06d80 (strlen)
--18729-- REDIR: 0x3f76074dc0 (malloc) redirected to 0x4a05d9a (malloc)
--18729-- REDIR: 0x3f76072870 (free) redirected to 0x4a059aa (free)
--18729-- REDIR: 0x3f76079e90 (strncpy) redirected to 0x4a081a0 (strncpy)
--18729-- REDIR: 0x3f76079dd0 (strncmp) redirected to 0x4a06de0 (strncmp)
--18729-- REDIR: 0x3f760749e0 (calloc) redirected to 0x4a05092 (calloc)
--18729-- REDIR: 0x3f7c8bd1c0 (operator new(unsigned long)) redirected to 0x4a065ea (operator new(unsigned long))
--18729-- REDIR: 0x3f7607b930 (mempcpy) redirected to 0x4a07870 (mempcpy)
--18729-- REDIR: 0xffffffffff600400 (???) redirected to 0x3803e75d (vgPlain_amd64_linux_REDIR_FOR_vtime)
--18729-- REDIR: 0x3f76079610 (strcpy) redirected to 0x4a082c0 (strcpy)
--18729-- REDIR: 0x3f760795d0 (strcmp) redirected to 0x4a06e50 (strcmp)
--18729-- REDIR: 0x3f76079c40 (strnlen) redirected to 0x4a06d50 (strnlen)
--18729-- REDIR: 0x3f7c8bbf50 (operator delete(void*)) redirected to 0x4a056bc (operator delete(void*))
--18729-- REDIR: 0x3f7607a640 (memchr) redirected to 0x4a06f10 (memchr)
--18729-- REDIR: 0x3f76079420 (index) redirected to 0x4a06b70 (index)
--18729-- REDIR: 0x3f7607ac60 (memmove) redirected to 0x4a07080 (memmove)
--18729-- REDIR: 0x3f760752e0 (realloc) redirected to 0x4a05e4b (realloc)

--18729-- REDIR: 0x3f76079d30 (strncat) redirected to 0x4a06c60 (strncat)
--18729-- REDIR: 0x3f76079260 (strcat) redirected to 0x4a07590 (strcat)
--18729-- REDIR: 0x3f7607cea0 (rawmemchr) redirected to 0x4a07110 (rawmemchr)
--18729-- REDIR: 0x3f76074ff0 (memalign) redirected to 0x4a04f6c (memalign)
--18729-- REDIR: 0xffffffffff600000 (???) redirected to 0x3803e753 (vgPlain_amd64_linux_REDIR_FOR_vgettimeofday)
--18729-- REDIR: 0x3f7607cff0 (strchrnul) redirected to 0x4a070e0 (strchrnul)
--18729-- REDIR: 0x3f7607bf40 (stpcpy) redirected to 0x4a07b10 (stpcpy)
--18729-- Reading syms from /lib64/libnss_files-2.5.so (0x794b000)
--18729-- Reading syms from /lib64/libnss_nis-2.5.so (0x7b56000)
--18729-- REDIR: 0x3f76075230 (posix_memalign) redirected to 0x4a05047 (posix_memalign)
--18729-- Reading syms from /usr/lib64/libdaplcma.so.1.0.2 (0x8762000)
--18729--    object doesn't have a symbol table
==18729== Invalid write of size 8
==18729==    at 0x443BEF: initPopPara(population*, std::vector<message_para_to_workersT, std::allocator<message_para_to_workersT> >&, initParaType&, int, int, std::vector<double, std::allocator<double> >&) (main-parallel2.cpp:552)
==18729==    by 0x44F12E: main (main-parallel2.cpp:204)
==18729==  Address 0x62c9da0 is 0 bytes after a block of size 0 alloc'd
==18729==    at 0x4A0666E: operator new(unsigned long) (vg_replace_malloc.c:220)
==18729==    by 0x4573E4: void std::__uninitialized_fill_n_aux<message_para_to_workersT*, unsigned long, message_para_to_workersT>(message_para_to_workersT*, unsigned long, message_para_to_workersT const&, __false_type) (new_allocator.h:88)
==18729==    by 0x4576CF: void std::__uninitialized_fill_n_a<message_para_to_workersT*, unsigned long, message_para_to_workersT, message_para_to_workersT>(message_para_to_workersT*, unsigned long, message_para_to_workersT const&, std::allocator<message_para_to_workersT>) (stl_uninitialized.h:218)
==18729==    by 0x44EE2E: main (stl_vector.h:218)
==18729==
==18729== Invalid read of size 8
==18729==    at 0x44F13A: main (main-parallel2.cpp:208)
==18729==  Address 0x62c9d60 is 0 bytes after a block of size 0 alloc'd
==18729==    at 0x4A0666E: operator new(unsigned long) (vg_replace_malloc.c:220)
==18729==    by 0x45733D: void std::__uninitialized_fill_n_aux<message_para_to_workersT*, unsigned long, message_para_to_workersT>(message_para_to_workersT*, unsigned long, message_para_to_workersT const&, __false_type) (new_allocator.h:88)
==18729==    by 0x4576CF: void std::__uninitialized_fill_n_a<message_para_to_workersT*, unsigned long, message_para_to_workersT, message_para_to_workersT>(message_para_to_workersT*, unsigned long, message_para_to_workersT const&, std::allocator<message_para_to_workersT>) (stl_uninitialized.h:218)
==18729==    by 0x44EE2E: main (stl_vector.h:218)
==18729==

valgrind: m_mallocfree.c:225 (mk_plain_bszB): Assertion 'bszB != 0' failed.
valgrind: This is probably caused by your program erroneously writing past the
end of a heap block and corrupting heap metadata.  If you fix any
invalid writes reported by Memcheck, this assertion failure will

probably go away.  Please try that before reporting this as a bug.

==18729==    at 0x38029D5C: report_and_quit (m_libcassert.c:145)
==18729==    by 0x3802A032: vgPlain_assert_fail (m_libcassert.c:217)
==18729==    by 0x38035645: vgPlain_arena_malloc (m_mallocfree.c:225)
==18729==    by 0x38002BB5: vgMemCheck_new_block (mc_malloc_wrappers.c:199)
==18729==    by 0x38002F6B: vgMemCheck___builtin_new (mc_malloc_wrappers.c:246)
==18729==    by 0x3806070C: do_client_request (scheduler.c:1362)
==18729==    by 0x38061D30: vgPlain_scheduler (scheduler.c:1061)
==18729==    by 0x38085E6E: run_a_thread_NORETURN (syswrap-linux.c:91)

sched status:
  running_tid=1

Thread 1: status = VgTs_Runnable
==18729==    at 0x4A0666E: operator new(unsigned long) (vg_replace_malloc.c:220)
==18729==    by 0x464506: __gnu_cxx::new_allocator<int>::allocate(unsigned long, void const*) (new_allocator.h:88)
==18729==    by 0x46452E: std::_Vector_base<int, std::allocator<int> >::_M_allocate(unsigned long) (stl_vector.h:127)
==18729==    by 0x464560: std::_Vector_base<int, std::allocator<int> >::_Vector_base(unsigned long, std::allocator<int> const&) (stl_vector.h:113)
==18729==    by 0x464B6A: std::vector<int, std::allocator<int> >::vector(unsigned long, int const&, std::allocator<int> const&) (stl_vector.h:216)
==18729==    by 0x488F62: Index::Index() (index.cpp:20)
==18729==    by 0x489147: ReadFile(char const*) (index.cpp:86)
==18729==    by 0x48941C: ImportIndices() (index.cpp:121)
==18729==    by 0x445D00: myNeplanTaskScheduler(CNSGA2*, int, int, int, population*, char, int, std::vector<message_para_to_workersT, std::allocator<message_para_to_workersT> >&, ompi_datatype_t*, int&, int&, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > >&, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > >&, std::vector<double, std::allocator<double> >&, int, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > >&, ompi_datatype_t*, int, ompi_datatype_t*, int) (myNetplanScheduler.cpp:109)
==18729==    by 0x44F2DF: main (main-parallel2.cpp:216)


Note: see also the FAQ in the source distribution.
It contains workarounds to several common problems.
In particular, if Valgrind aborted or crashed after
identifying problems in your program, there's a good chance
that fixing those problems will prevent Valgrind aborting or
crashing, especially if it happened in m_mallocfree.c.


------------------------------------------------------------



> Subject: Re: [OMPI users] OMPI seg fault by a class with weird address.
> From: jsquyres@cisco.com
> Date: Wed, 16 Mar 2011 06:43:01 -0400
> To: dtustudy68@hotmail.com
> CC: users@open-mpi.org
>
> Did you run with a memory checking debugger like Valgrind?
>
> Sent from my phone. No type good.
>
> On Mar 15, 2011, at 8:30 PM, "Jack Bryan" <dtustudy68@hotmail.com> wrote:
>
> > Hi,
> >
> > I have installed a new open MPI 1.3.4.
> >
> > But I got more weird errors:
> >
> > *** glibc detected *** /lustre/nsga2b: malloc(): memory corruption (fast): 0x000000001cafc450 ***
> > ======= Backtrace: =========
> > /lib64/libc.so.6[0x3c50272aeb]
> > /lib64/libc.so.6(__libc_malloc+0x7a)[0x3c5027402a]
> > /usr/lib64/libstdc++.so.6(_Znwm+0x1d)[0x3c590bd17d]
> > /lustre/jxding/netplan49/nsga2b[0x445bc6]
> > /lustre/jxding/netplan49/nsga2b[0x44f43b]
> > /lib64/libc.so.6(__libc_start_main+0xf4)[0x3c5021d974]
> > /lustre/jxding/netplan49/nsga2b(__gxx_personality_v0+0x499)[0x443909]
> > ======= Memory map: ========
> > 00400000-00f33000 r-xp 00000000 6ac:e3210 685016360 /lustre/netplan49/nsga2b
> > 01132000-0117e000 rwxp 00b32000 6ac:e3210 685016360 /lustre/netplan49/nsga2b
> > 0117e000-01188000 rwxp 0117e000 00:00 0
> > 1ca11000-1ca78000 rwxp 1ca11000 00:00 0
> > 1ca78000-1ca79000 rwxp 1ca78000 00:00 0
> > 1ca79000-1ca7a000 rwxp 1ca79000 00:00 0
> > 1ca7a000-1cab8000 rwxp 1ca7a000 00:00 0
> > 1cab8000-1cac7000 rwxp 1cab8000 00:00 0
> > 1cac7000-1cacf000 rwxp 1cac7000 00:00 0
> > 1cacf000-1cad0000 rwxp 1cacf000 00:00 0
> > 1cad0000-1cad1000 rwxp 1cad0000 00:00 0
> > 1cad1000-1cad2000 rwxp 1cad1000 00:00 0
> > 1cad2000-1cada000 rwxp 1cad2000 00:00 0
> > 1cada000-1cadc000 rwxp 1cada000 00:00 0
> > 1cadc000-1cae0000 rwxp 1cadc000 00:00 0
> >
> > .........................
> > 512600000-3512605000 r-xp 00000000 00:11 12043 /usr/lib64/librdmacm.so.1
> > 3512605000-3512804000 ---p 00005000 00:11 12043 /usr/lib64/librdmacm.so.1
> > 3512804000-3512805000 rwxp 00004000 00:11 12043 /usr/lib64/librdmacm.so.1
> > 3512e00000-3512e0c000 r-xp 00000000 00:11 5545 /usr/lib64/libibverbs.so.1
> > 3512e0c000-351300b000 ---p 0000c000 00:11 5545 /usr/lib64/libibverbs.so.1
> > 351300b000-351300c000 rwxp 0000b000 00:11 5545 /usr/lib64/libibverbs.so.1
> > 3c4f200000-3c4f21c000 r-xp 00000000 00:11 2853 /lib64/ld-2.5.so
> > 3c4f41b000-3c4f41c000 r-xp 0001b000 00:11 2853 /lib64/ld-2.5.so
> > 3c4f41c000-3c4f41d000 rwxp 0001c000 00:11 2853 /lib64/ld-2.5.so
> > 3c50200000-3c5034c000 r-xp 00000000 00:11 897 /lib64/libc.so.6
> > 3c5034c000-3c5054c000 ---p 0014c000 00:11 897 /lib64/libc.so.6
> > 3c5054c000-3c50550000 r-xp 0014c000 00:11 897 /lib64/libc.so.6
> > 3c50550000-3c50551000 rwxp 00150000 00:11 897 /lib64/libc.so.6
> > 3c50551000-3c50556000 rwxp 3c50551000 00:00 0
> > 3c50600000-3c50682000 r-xp 00000000 00:11 2924 /lib64/libm.so.6
> > 3c50682000-3c50881000 ---p 00082000 00:11 2924 /lib64/libm.so.6
> > 3c50881000-3c50882000 r-xp 00081000 00:11 2924 /lib64/libm.so.6
> > 3c50882000-3c50883000 rwxp 00082000 00:11 2924 /lib64/libm.so.6
> > 3c50a00000-3c50a02000 r-xp 00000000 00:11 923 /lib64/libdl.so.2
> > 3c50a02000-3c50c02000 ---p 00002000 00:11 923 /lib64/libdl.so.2
> > 3c50c02000-3c50c03000 r-xp 00002000 00:11 923 /lib64/libdl.so.2
> > 3c50c03000-3c50c04000 rwxp 00003000 00:11 923 /lib64/libdl.so.2
> > 3c50e00000-3c50e16000 r-xp 00000000 00:11 1011 /lib64/libpthread.so.0
> >
> > .....................
> > 2ae87b05e000-2ae87b075000 r-xp 00000000 6ac:e3210 686492235 /lustre/mpi_protocol_091117/openmpi134/lib/libmpi_cxx.so.0.0.0
> > 2ae87b075000-2ae87b274000 ---p 00017000 6ac:e3210 686492235 /lustre/mpi_protocol_091117/openmpi134/lib/libmpi_cxx.so.0.0.0
> > 2ae87b274000-2ae87b277000 rwxp 00016000 6ac:e3210 686492235 /lustre/mpi_protocol_091117/openmpi134/lib/libmpi_cxx.so.0.0.0
> >
> >
> >
> > fff2fa38000-7fff2fa4e000 rwxp 7ffffffe9000 00:00 0 [stack]
> > ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0 [vdso]
> > [n332:82320] *** Process received signal ***
> > [n332:82320] Signal: Aborted (6)
> > [n332:82320] Signal code: (-6)
> > [n332:82320] [ 0] /lib64/libpthread.so.0 [0x3c50e0e4c0]
> > [n332:82320] [ 1] /lib64/libc.so.6(gsignal+0x35) [0x3c50230215]
> > [n332:82320] [ 2] /lib64/libc.so.6(abort+0x110) [0x3c50231cc0]
> > [n332:82320] [ 3] /lib64/libc.so.6 [0x3c5026a7fb]
> > [n332:82320] [ 4] /lib64/libc.so.6 [0x3c50272aeb]
> > [n332:82320] [ 5] /lib64/libc.so.6(__libc_malloc+0x7a) [0x3c5027402a]
> > [n332:82320] [ 6] /usr/lib64/libstdc++.so.6(_Znwm+0x1d) [0x3c590bd17d]
> > [n332:82320] [ 7] /lustre/jxding/netplan49/nsga2b [0x445bc6]
> > [n332:82320] [ 8] /lustre/jxding/netplan49/nsga2b [0x44f43b]
> > [n332:82320] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3c5021d974]
> > [n332:82320] [10] /lustre/nsga2b(__gxx_personality_v0+0x499) [0x443909]
> > [n332:82320] *** End of error message ***
> > =>> PBS: job killed: walltime 117 exceeded limit 90
> > mpirun: killing job...
> >
> >
> >
> >
> > > Subject: Re: [OMPI users] OMPI seg fault by a class with weird address.
> > > From: jsquyres@cisco.com
> > > Date: Tue, 15 Mar 2011 12:50:41 -0400
> > > CC: users@open-mpi.org
> > > To: dtustudy68@hotmail.com
> > >
> > > You can:
> > >
> > > mpirun -np 4 valgrind ./my_application
> > >
> > > That is, you run 4 copies of valgrind, each with one instance of ./my_application. Then you'll get valgrind reports for your applications. You might want to dig into the valgrind command line options to have it dump the results to files with unique prefixes (e.g., PID and/or hostname) so that you can get a unique report from each process.
> > >
> > > If you disabled ptmalloc and you're still getting the same error, then it sounds like an application error. Check out and see what valgrind tells you.
> > >
> > >
> > >
> > > On Mar 15, 2011, at 11:25 AM, Jack Bryan wrote:
> > >
> > > > Thanks,
> > > >
> > > > From http://valgrind.org/docs/manual/mc-manual.html#mc-manual.mpiwrap
> > > >
> > > > I find that
> > > >
> > > > "Currently the wrappers are only buildable with mpiccs which are based on GNU GCC or Intel's C++ Compiler."
> > > >
> > > > The cluster which I am working on is using GNU Open MPI mpic++. i am afraid that the Valgrind wrapper can work here.
> > > >
> > > > I do not have system administrator authorization.
> > > >
> > > > Are there other mem-checkers (open source) that can do this ?
> > > >
> > > > thanks
> > > >
> > > > Jack
> > > >
> > > > > Subject: Re: [OMPI users] OMPI seg fault by a class with weird address.
> > > > > From: jsquyres@cisco.com
> > > > > Date: Tue, 15 Mar 2011 06:19:53 -0400
> > > > > CC: dtustudy68@hotmail.com
> > > > > To: users@open-mpi.org
> > > > >
> > > > > You may also want to run your program through a memory-checking debugger such as valgrind to see if it turns up any other problems.
> > > > >
> > > > > AFIK, ptmalloc should be fine for use with STL vector allocation.
> > > > >
> > > > >
> > > > > On Mar 15, 2011, at 4:00 AM, Belaid MOA wrote:
> > > > >
> > > > > > Hi Jack,
> > > > > > I may need to see the whole code to decide but my quick look suggest that ptmalloc is causing a problem with STL-vector allocation. ptmalloc is the openMPI internal malloc library. Could you try to build openMPI without memory management (using --without-memory-manager) and let us know the outcome. ptmalloc is not needed if you are not using an RDMA interconnect.
> > > > > >
> > > > > > With best regards,
> > > > > > -Belaid.
> > > > > >
> > > > > > From: dtustudy68@hotmail.com
> > > > > > To: belaid_moa@hotmail.com; users@open-mpi.org
> > > > > > Subject: RE: [OMPI users] OMPI seg fault by a class with weird address.
> > > > > > Date: Tue, 15 Mar 2011 00:30:19 -0600
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > Because the code is very long, I just show the calling relationship of functions.
> > > > > >
> > > > > > main()
> > > > > > {
> > > > > > scheduler();
> > > > > >
> > > > > > }
> > > > > > scheduler()
> > > > > > {
> > > > > > ImportIndices();
> > > > > > }
> > > > > >
> > > > > > ImportIndices()
> > > > > > {
> > > > > > Index IdxNode ;
> > > > > > IdxNode = ReadFile("fileName");
> > > > > > }
> > > > > >
> > > > > > Index ReadFile(const char* fileinput)
> > > > > > {
> > > > > > Index TempIndex;
> > > > > > .........
> > > > > >
> > > > > > }
> > > > > >
> > > > > > vector<int> Index::GetPosition() const { return Position; }
> > > > > > vector<int> Index::GetColumn() const { return Column; }
> > > > > > vector<int> Index::GetYear() const { return Year; }
> > > > > > vector<string> Index::GetName() const { return Name; }
> > > > > > int Index::GetPosition(const int idx) const { return Position[idx]; }
> > > > > > int Index::GetColumn(const int idx) const { return Column[idx]; }
> > > > > > int Index::GetYear(const int idx) const { return Year[idx]; }
> > > > > > string Index::GetName(const int idx) const { return Name[idx]; }
> > > > > > int Index::GetSize() const { return Position.size(); }
> > > > > >
> > > > > > The sequential code works well, and there is no scheduler().
> > > > > >
> > > > > > The parallel code output from gdb:
> > > > > > ----------------------------------------------
> > > > > > Breakpoint 1, myNeplanTaskScheduler(CNSGA2 *, int, int, int, ._85 *, char, int, message_para_to_workers_VecT &, MPI_Datatype, int &, int &, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > > &, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > > &, std::vector<double, std::allocator<double> > &, int, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > > &, MPI_Datatype, int, MPI_Datatype, int) (nsga2=0x118c490,
> > > > > > popSize=<value optimized out>, nodeSize=<value optimized out>,
> > > > > > myRank=<value optimized out>, myChildpop=0x1208d80, genCandTag=65 'A',
> > > > > > generationNum=1, myPopParaVec=std::vector of length 4, capacity 4 = {...},
> > > > > > message_to_master_type=0x7fffffffd540, myT1Flag=@0x7fffffffd68c,
> > > > > > myT2Flag=@0x7fffffffd688,
> > > > > > resultTaskPackageT1=std::vector of length 4, capacity 4 = {...},
> > > > > > resultTaskPackageT2Pr=std::vector of length 4, capacity 4 = {...},
> > > > > > xdataV=std::vector of length 4, capacity 4 = {...}, objSize=7,
> > > > > > resultTaskPackageT12=std::vector of length 4, capacity 4 = {...},
> > > > > > xdata_to_workers_type=0x121c410, myGenerationNum=1,
> > > > > > Mpara_to_workers_type=0x121b9b0, nconNum=0)
> > > > > > at src/nsga2/myNetplanScheduler.cpp:109
> > > > > > 109 ImportIndices();
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 2, ImportIndices () at src/index.cpp:120
> > > > > > 120 IdxNode = ReadFile("prepdata/idx_node.csv");
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 4, ReadFile (fileinput=0xd8663d "prepdata/idx_node.csv")
> > > > > > at src/index.cpp:86
> > > > > > 86 Index TempIndex;
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 5, Index::Index (this=0x7fffffffcb80) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Program received signal SIGSEGV, Segmentation fault.
> > > > > > 0x00002aaaab3b0b81 in opal_memory_ptmalloc2_int_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > >
> > > > > > ---------------------------------------
> > > > > > the backtrace output from the above parallel OpenMPI code:
> > > > > >
> > > > > > (gdb) bt
> > > > > > #0 0x00002aaaab3b0b81 in opal_memory_ptmalloc2_int_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > > #1 0x00002aaaab3b2bd3 in opal_memory_ptmalloc2_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > > #2 0x0000003f7c8bd1dd in operator new(unsigned long) ()
> > > > > > from /usr/lib64/libstdc++.so.6
> > > > > > #3 0x00000000004646a7 in __gnu_cxx::new_allocator<int>::allocate (
> > > > > > this=0x7fffffffcb80, __n=0)
> > > > > > at /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/ext/new_allocator.h:88
> > > > > > #4 0x00000000004646cf in std::_Vector_base<int, std::allocator<int> >::_M_allocate (this=0x7fffffffcb80, __n=0)
> > > > > > at /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/bits/stl_vector.h:127
> > > > > > #5 0x0000000000464701 in std::_Vector_base<int, std::allocator<int> >::_Vector_base (this=0x7fffffffcb80, __n=0, __a=...)
> > > > > > at /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/bits/stl_vector.h:113
> > > > > > #6 0x0000000000464d0b in std::vector<int, std::allocator<int> >::vector (
> > > > > > this=0x7fffffffcb80, __n=0, __value=@0x7fffffffc968, __a=...)
> > > > > > at /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/bits/stl_vector.h:216
> > > > > > #7 0x00000000004890d7 in Index::Index (this=0x7fffffffcb80)
> > > > > > ---Type <return> to continue, or q <return> to quit---
> > > > > > at src/index.cpp:20
> > > > > > #8 0x000000000048927a in ReadFile (fileinput=0xd8663d "prepdata/idx_node.csv")
> > > > > > at src/index.cpp:86
> > > > > > #9 0x0000000000489533 in ImportIndices () at src/index.cpp:120
> > > > > > #10 0x0000000000445e0e in myNeplanTaskScheduler(CNSGA2 *, int, int, int, ._85 *, char, int, message_para_to_workers_VecT &, MPI_Datatype, int &, int &, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > > &, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > > &, std::vector<double, std::allocator<double> > &, int, std::vector<std::vector<double, std::allocator<double> >, std::allocator<std::vector<double, std::allocator<double> > > > &, MPI_Datatype, int, MPI_Datatype, int) (nsga2=0x118c490,
> > > > > > popSize=<value optimized out>, nodeSize=<value optimized out>,
> > > > > > myRank=<value optimized out>, myChildpop=0x1208d80, genCandTag=65 'A',
> > > > > > generationNum=1, myPopParaVec=std::vector of length 4, capacity 4 = {...},
> > > > > > message_to_master_type=0x7fffffffd540, myT1Flag=@0x7fffffffd68c,
> > > > > > myT2Flag=@0x7fffffffd688,
> > > > > > resultTaskPackageT1=std::vector of length 4, capacity 4 = {...},
> > > > > > resultTaskPackageT2Pr=std::vector of length 4, capacity 4 = {...},
> > > > > > xdataV=std::vector of length 4, capacity 4 = {...}, objSize=7,
> > > > > > resultTaskPackageT12=std::vector of length 4, capacity 4 = {...},
> > > > > > xdata_to_workers_type=0x121c410, myGenerationNum=1,
> > > > > > Mpara_to_workers_type=0x121b9b0, nconNum=0)
> > > > > > ---Type <return> to continue, or q <return> to quit---
> > > > > > at src/nsga2/myNetplanScheduler.cpp:109
> > > > > > #11 0x000000000044f44b in main (argc=1, argv=0x7fffffffd998)
> > > > > > at src/nsga2/main-parallel2.cpp:216
> > > > > > ----------------------------------------------------
> > > > > >
> > > > > > What is "opal_memory_ptmalloc2_int_malloc ()" ?
> > > > > >
> > > > > > The gdb output from sequential code:
> > > > > > -------------------------------------
> > > > > > Breakpoint 1, main (argc=<value optimized out>, argv=<value optimized out>)
> > > > > > at src/nsga2/main-seq.cpp:32
> > > > > > 32 ImportIndices();
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 2, ImportIndices () at src/index.cpp:115
> > > > > > 115 IdxNode = ReadFile("prepdata/idx_node.csv");
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 4, ReadFile (fileinput=0xd6bb9d "prepdata/idx_node.csv")
> > > > > > at src/index.cpp:86
> > > > > > 86 Index TempIndex;
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 5, Index::Index (this=0x7fffffffd6d0) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 4, ReadFile (fileinput=0xd6bbb3 "prepdata/idx_ud.csv")
> > > > > > at src/index.cpp:86
> > > > > > 86 Index TempIndex;
> > > > > > (gdb) bt
> > > > > > #0 ReadFile (fileinput=0xd6bbb3 "prepdata/idx_ud.csv") at src/index.cpp:86
> > > > > > #1 0x0000000000471cc9 in ImportIndices () at src/index.cpp:116
> > > > > > #2 0x000000000043bba6 in main (argc=<value optimized out>,
> > > > > > argv=<value optimized out>) at src/nsga2/main-seq.cpp:32
> > > > > >
> > > > > > --------------------------------------
> > > > > > thanks
> > > > > >
> > > > > >
> > > > > > From: belaid_moa@hotmail.com
> > > > > > To: users@open-mpi.org; dtustudy68@hotmail.com
> > > > > > Subject: RE: [OMPI users] OMPI seg fault by a class with weird address.
> > > > > > Date: Tue, 15 Mar 2011 06:16:35 +0000
> > > > > >
> > > > > > Hi Jack,
> > > > > > 1- Where is your main function to see how you called your class?
> > > > > > 2- I do not see the implementation of GetPosition, GetName, etc.?
> > > > > >
> > > > > > With best regards,
> > > > > > -Belaid.
> > > > > >
> > > > > >
> > > > > > From: dtustudy68@hotmail.com
> > > > > > To: users@open-mpi.org
> > > > > > Date: Mon, 14 Mar 2011 19:04:12 -0600
> > > > > > Subject: [OMPI users] OMPI seg fault by a class with weird address.
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > I got a run-time error of a Open MPI C++ program.
> > > > > >
> > > > > > The following output is from gdb:
> > > > > >
> > > > > > --------------------------------------------------------------------------
> > > > > > Program received signal SIGSEGV, Segmentation fault.
> > > > > > 0x00002aaaab3b0b81 in opal_memory_ptmalloc2_int_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > >
> > > > > > At the point
> > > > > >
> > > > > > Breakpoint 9, Index::Index (this=0x7fffffffcb80) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > >
> > > > > > The Index has been called before this point and no problem:
> > > > > > -------------------------------------------------------
> > > > > > Breakpoint 9, Index::Index (this=0x117d800) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > >
> > > > > > Breakpoint 9, Index::Index (this=0x117d860) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > ----------------------------------------------------------------------------
> > > > > >
> > > > > > It seems that the 0x7fffffffcb80 address is a problem.
> > > > > >
> > > > > > But, I donot know the reason and how to remove the bug.
> > > > > >
> > > > > > Any help is really appreciated.
> > > > > >
> > > > > > thanks
> > > > > >
> > > > > > the following is the index definition.
> > > > > >
> > > > > > ---------------------------------------------------------
> > > > > > class Index {
> > > > > > public:
> > > > > > Index();
> > > > > > Index(const Index& rhs);
> > > > > > ~Index();
> > > > > > Index& operator=(const Index& rhs);
> > > > > >
> > > > > > vector<int> GetPosition() const;
> > > > > > vector<int> GetColumn() const;
> > > > > > vector<int> GetYear() const;
> > > > > > vector<string> GetName() const;
> > > > > > int GetPosition(const int idx) const;
> > > > > > int GetColumn(const int idx) const;
> > > > > > int GetYear(const int idx) const;
> > > > > > string GetName(const int idx) const;
> > > > > > int GetSize() const;
> > > > > >
> > > > > > void Add(const int idx, const int col, const string& name);
> > > > > > void Add(const int idx, const int col, const int year, const string& name);
> > > > > > void Add(const int idx, const Step& col, const string& name);
> > > > > > void WriteFile(const char* fileinput) const;
> > > > > >
> > > > > > private:
> > > > > > vector<int> Position;
> > > > > > vector<int> Column;
> > > > > > vector<int> Year;
> > > > > > vector<string> Name;
> > > > > > };
> > > > > > // Contructors and destructor for the Index class
> > > > > > Index::Index() :
> > > > > > Position(0),
> > > > > > Column(0),
> > > > > > Year(0),
> > > > > > Name(0) {}
> > > > > >
> > > > > > Index::Index(const Index& rhs) :
> > > > > > Position(rhs.GetPosition()),
> > > > > > Column(rhs.GetColumn()),
> > > > > > Year(rhs.GetYear()),
> > > > > > Name(rhs.GetName()) {}
> > > > > >
> > > > > > Index::~Index() {}
> > > > > >
> > > > > > Index& Index::operator=(const Index& rhs) {
> > > > > > Position = rhs.GetPosition();
> > > > > > Column = rhs.GetColumn(),
> > > > > > Year = rhs.GetYear(),
> > > > > > Name = rhs.GetName();
> > > > > > return *this;
> > > > > > }
> > > > > > ----------------------------------------------------------
> > > > > >
> > > > > >
> > > > > >
> > > > > > _______________________________________________ users mailing list users@open-mpi.org http://www.open-mpi.org/mailman/listinfo.cgi/users
> > > > > > _______________________________________________
> > > > > > users mailing list
> > > > > > users@open-mpi.org
> > > > > > http://www.open-mpi.org/mailman/listinfo.cgi/users
> > > > >
> > > > >
> > > > > --
> > > > > Jeff Squyres
> > > > > jsquyres@cisco.com
> > > > > For corporate legal information go to:
> > > > > http://www.cisco.com/web/about/doing_business/legal/cri/
> > > > >
> > >
> > >
> > > --
> > > Jeff Squyres
> > > jsquyres@cisco.com
> > > For corporate legal information go to:
> > > http://www.cisco.com/web/about/doing_business/legal/cri/
> > >