Open MPI logo

Open MPI User's Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Open MPI User's mailing list

Subject: Re: [OMPI users] MPI_ALLREDUCE bug with 1.5.2rc3r24441
From: Harald Anlauf (anlauf_at_[hidden])
Date: 2011-03-09 16:37:51


Jeff,

it's funny because I do not see my problem with C (when using
long long) but only with Fortran and INTEGER8.

I have rewritten the testcase so that it uses MPI_REDUCE_LOCAL,
which unfortunately does not link with openmpi-1.4.3. Apparently
this is a new feature of openmpi-1.5.

Here's the modified testcase:

program test
  use mpi
  implicit none
  integer, parameter :: i8 = selected_int_kind (15)
  integer, parameter :: N = 3

  integer :: i4i(N), i4s(N)
  integer(i8) :: i8i(N), i8s(N)
  integer :: ierr, nproc, myrank, i

  i4i = (/ (i, i=1,N) /); i8i = (/ (i, i=1,N) /)

  call MPI_INIT (ierr)
  call MPI_COMM_SIZE (MPI_COMM_WORLD, nproc, ierr)
  call MPI_COMM_RANK (MPI_COMM_WORLD, myrank, ierr)

  if (myrank == 0) then
     print *, "Integer kind, bits:", i8, bit_size (1_i8)
     print *, "Default Integer :", kind (1), bit_size (1)
  end if

  i4s = 0
  call MPI_REDUCE_LOCAL (i4i, i4s, N, MPI_INTEGER4, MPI_SUM, ierr)
  if (myrank == 0) print *, "MPI_Reduce_local: Sum[integer(4)]:", i4s

  i8s = 0
  call MPI_REDUCE_LOCAL (i8i, i8s, N, MPI_INTEGER8, MPI_SUM, ierr)
  if (myrank == 0) print *, "MPI_Reduce_local: Sum[integer(8)]:", i8s

  call MPI_FINALIZE (ierr)
end program test

Compiled with Intel Fortran v12:

% /opt/ifort12/openmpi-1.5/bin/mpif90 -v mpi-allreducetest.f90
-Wl,-R/opt/ifort12/openmpi-1.5/lib

% ./a.out
 Integer kind, bits: 8 64
 Default Integer : 4 32
 MPI_Reduce_local: Sum[integer(4)]: 1 2 3
[proton:11545] *** An error occurred in MPI_Reduce_local: the reduction
operation MPI_SUM is not defined on the MPI_INTEGER8 datatype
[proton:11545] *** on communicator MPI_COMM_WORLD
[proton:11545] *** MPI_ERR_OP: invalid reduce operation
[proton:11545] *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)

(Note: the test works with INTEGER2 but not with INTEGER1 and INTEGER8)

Here's my tentative gdb session (sorry, I'm not very experienced):

anlauf_at_proton:/opt/sources/mpi> gdb a.out
GNU gdb (GDB; openSUSE 11.1) 6.8.50.20081120-cvs
Copyright (C) 2008 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later
<http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "i586-suse-linux".
For bug reporting instructions, please see:
<http://bugs.opensuse.org/>...
(gdb) break MAIN__
Breakpoint 1 at 0x804f9c0: file mpi-allreducetest.f90, line 1.
(gdb) run
Starting program: /opt/sources/mpi/a.out
[Thread debugging using libthread_db enabled]

Breakpoint 1, test () at mpi-allreducetest.f90:1
1 program test
Current language: auto; currently fortran
(gdb) break 27
Breakpoint 2 at 0x804fdc1: file mpi-allreducetest.f90, line 27.
(gdb) cont
Continuing.
 Integer kind, bits: 8 64
 Default Integer : 4 32
 MPI_Reduce_local: Sum[integer(4)]: 1 2 3

Breakpoint 2, test () at mpi-allreducetest.f90:27
27 call MPI_REDUCE_LOCAL (i8i, i8s, N, MPI_INTEGER8, MPI_SUM, ierr)
(gdb) s
mpi_reduce_local_f (inbuf=0x80bcc00 "\001", inoutbuf=0x80bcc30 "",
    count=0x80a9c60, datatype=0x80a9c78, op=0x80a9c60, ierr=0xbfffeabc)
    at preduce_local_f.c:64
64 {
Current language: auto; currently c
(gdb) l 62,80
62 void mpi_reduce_local_f(char *inbuf, char *inoutbuf, MPI_Fint *count,
63 MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr)
64 {
65 MPI_Datatype c_type;
66 MPI_Op c_op;
67
68 c_type = MPI_Type_f2c(*datatype);
69 c_op = MPI_Op_f2c(*op);
70
71 inbuf = (char *) OMPI_F2C_BOTTOM(inbuf);
72 inoutbuf = (char *) OMPI_F2C_BOTTOM(inoutbuf);
73
74 *ierr = OMPI_INT_2_FINT(MPI_Reduce_local(inbuf, inoutbuf,
75 OMPI_FINT_2_INT(*count),
76 c_type, c_op));
77 }
(gdb) print *datatype
$1 = 11
(gdb) print *op
$2 = 3
(gdb) break 74
Breakpoint 3 at 0xb7fd3c86: file preduce_local_f.c, line 74.
(gdb) cont
Continuing.

Breakpoint 3, mpi_reduce_local_f (inbuf=0x80bcc00 "\001",
    inoutbuf=0x80bcc30 "", count=0x80a9c60, datatype=0x80a9c78,
op=0x80a9c60,
    ierr=0xbfffeabc) at preduce_local_f.c:74
74 *ierr = OMPI_INT_2_FINT(MPI_Reduce_local(inbuf, inoutbuf,
(gdb) print *c_type
$3 = {super = {super = {obj_class = 0xb7f9e3a4, obj_reference_count = 1},
    flags = 54070, id = 7, bdt_used = 128, size = 8, true_lb = 0,
    true_ub = 8, lb = 0, ub = 8, align = 1, nbElems = 1,
    name = "OPAL_INT8", '\0' <repeats 54 times>, desc = {length = 1,
      used = 1, desc = 0xb7fab418}, opt_desc = {length = 1, used = 1,
      desc = 0xb7fab418}, btypes = {0, 0, 0, 0, 0, 0, 0, 1,
      0 <repeats 38 times>}}, id = 42, d_f_to_c_index = 11, d_keyhash =
0x0,
  args = 0x0, packed_description = 0x0,
  name = "MPI_INTEGER8", '\0' <repeats 51 times>}
(gdb) print c_op
$4 = <value optimized out>
(gdb) s
PMPI_Reduce_local (inbuf=0x80bcc00, inoutbuf=0x80bcc30, count=3,
    datatype=0xb7f9ad18, op=0xb7faa2c8) at preduce_local.c:48
48 if (MPI_PARAM_CHECK) {
(gdb) print *op
$5 = {super = {obj_class = 0xb7f93e00, obj_reference_count = 1},
  o_name = "MPI_SUM", '\0' <repeats 56 times>, o_flags = 41,
  o_f_to_c_index = 3, o_func = {intrinsic = {fns = {
        0xb7f12461 <ompi_op_base_sum_unsigned_char>,
        0xb7f12440 <ompi_op_base_sum_signed_char>,
        0xb7f12482 <ompi_op_base_sum_int>,
        0xb7f124a3 <ompi_op_base_sum_long>,
        0xb7f124c4 <ompi_op_base_sum_short>,
        0xb7f124e7 <ompi_op_base_sum_unsigned_short>,
        0xb7f1250a <ompi_op_base_sum_unsigned>,
        0xb7f1252b <ompi_op_base_sum_unsigned_long>,
        0xb7f1254c <ompi_op_base_sum_long_long_int>,
        0xb7f1257a <ompi_op_base_sum_unsigned_long_long>,
        0xb7f125a8 <ompi_op_base_sum_fortran_integer>,
        0xb7f125c9 <ompi_op_base_sum_fortran_integer1>,
        0xb7f125ea <ompi_op_base_sum_fortran_integer2>,
        0xb7f1260d <ompi_op_base_sum_fortran_integer4>,
        0xb7f1262e <ompi_op_base_sum_fortran_integer8>, 0,
        0xb7f1265c <ompi_op_base_sum_float>,
        0xb7f1267e <ompi_op_base_sum_double>,
        0xb7f126cb <ompi_op_base_sum_fortran_real>, 0,
        0xb7f1270f <ompi_op_base_sum_fortran_real4>,
        0xb7f12731 <ompi_op_base_sum_fortran_real8>, 0,
        0xb7f126ed <ompi_op_base_sum_fortran_double_precision>,
        0xb7f126a0 <ompi_op_base_sum_long_double>, 0, 0,
        0xb7f12753 <ompi_op_base_sum_fortran_complex>,
        0xb7f12781 <ompi_op_base_sum_fortran_double_complex>,
        0xb7f127b6 <ompi_op_base_sum_fortran_complex8>,
        0xb7f127e4 <ompi_op_base_sum_fortran_complex16>,
        0 <repeats 12 times>}, modules = {0x8112ee8 <repeats 43 times>}},
    c_fn = 0xb7f12461 <ompi_op_base_sum_unsigned_char>,
    fort_fn = 0xb7f12461 <ompi_op_base_sum_unsigned_char>, cxx_data = {
      user_fn = 0xb7f12461 <ompi_op_base_sum_unsigned_char>,
      intercept_fn = 0xb7f12440 <ompi_op_base_sum_signed_char>}},
  o_3buff_intrinsic = {fns = {
      0xb7f147ff <ompi_op_base_3buff_sum_unsigned_char>,
      0xb7f147d8 <ompi_op_base_3buff_sum_signed_char>,
      0xb7f14826 <ompi_op_base_3buff_sum_int>,
      0xb7f1484d <ompi_op_base_3buff_sum_long>,
      0xb7f14874 <ompi_op_base_3buff_sum_short>,
      0xb7f1489e <ompi_op_base_3buff_sum_unsigned_short>,
      0xb7f148c8 <ompi_op_base_3buff_sum_unsigned>,
      0xb7f148ef <ompi_op_base_3buff_sum_unsigned_long>,
      0xb7f14916 <ompi_op_base_3buff_sum_long_long_int>,
      0xb7f14949 <ompi_op_base_3buff_sum_unsigned_long_long>,
      0xb7f1497c <ompi_op_base_3buff_sum_fortran_integer>,
      0xb7f149a3 <ompi_op_base_3buff_sum_fortran_integer1>,
      0xb7f149ca <ompi_op_base_3buff_sum_fortran_integer2>,
      0xb7f149f4 <ompi_op_base_3buff_sum_fortran_integer4>,
      0xb7f14a1b <ompi_op_base_3buff_sum_fortran_integer8>, 0,
      0xb7f14a4e <ompi_op_base_3buff_sum_float>,
      0xb7f14a75 <ompi_op_base_3buff_sum_double>,
      0xb7f14aca <ompi_op_base_3buff_sum_fortran_real>, 0,
      0xb7f14b18 <ompi_op_base_3buff_sum_fortran_real4>,
      0xb7f14b3f <ompi_op_base_3buff_sum_fortran_real8>, 0,
      0xb7f14af1 <ompi_op_base_3buff_sum_fortran_double_precision>,
      0xb7f14a9c <ompi_op_base_3buff_sum_long_double>, 0, 0,
      0xb7f14b66 <ompi_op_base_3buff_sum_fortran_complex>,
      0xb7f14b99 <ompi_op_base_3buff_sum_fortran_double_complex>,
      0xb7f14bd1 <ompi_op_base_3buff_sum_fortran_complex8>,
      0xb7f14c04 <ompi_op_base_3buff_sum_fortran_complex16>,
      0 <repeats 12 times>}, modules = {0x8112ee8 <repeats 43 times>}}}
(gdb) s
43 {
(gdb) s
48 if (MPI_PARAM_CHECK) {
(gdb) s
51 OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
(gdb) s
53 if (MPI_OP_NULL == op || NULL == op) {
(gdb) s
407 if (ompi_op_is_intrinsic(op)) {
(gdb) l
402 - if intrinsic ddt invoked on intrinsic op:
403 - ensure the datatype is defined in the op map
404 - ensure we have a function pointer for that combination
405 */
406
407 if (ompi_op_is_intrinsic(op)) {
408 if (ompi_datatype_is_predefined(ddt)) {
409 /* Intrinsic ddt on intrinsic op */
410 if (-1 == ompi_op_ddt_map[ddt->id] ||
411 NULL ==
op->o_func.intrinsic.fns[ompi_op_ddt_map[ddt->id]]) {
(gdb) s
408 if (ompi_datatype_is_predefined(ddt)) {
(gdb) s
410 if (-1 == ompi_op_ddt_map[ddt->id] ||
(gdb) whatis ddt
No symbol "ddt" in current context.
(gdb) s
412 asprintf(msg,
(gdb) s
56 int ret = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD,
MPI_ERR_OP, msg);
(gdb) s
ompi_errhandler_invoke (errhandler=0xb7fa2a98, mpi_object=0xb7fa2454,
    object_type=1, err_code=10,
    message=0x8146f78 "MPI_Reduce_local: the reduction operation MPI_SUM
is not defined on the MPI_INTEGER8 datatype") at
errhandler/errhandler_invoke.c:32
32 {
(gdb) quit
The program is running. Quit anyway (and kill it)? (y or n) y

Can you give me a hint how to figure out why the combination
MPI_SUM and MPI_INTEGER8 fails?

Harald