Hi,

I just tried OpenMPI 1.0.1 and this time I had much less warnings related to the C++ API than I had with version 1.0.0 (I compile with g++ -Wall).

I nonetheless looked at the C++ headers and found that those warnings were still related to the use of C-style cast.

Some of them were simply casting away the const type qualifier to call the C API MPI functions. Those casts could easily be converted to the const_cast<>() operator specially designed to do this.

I however found that some others were simply wrong and leading to faulty operations. Those casts are located in Intracomm::Alltoallw() and Intracomm::Spawn_multiple() methods.

In the first method, a pointer to a table of const MPI::Datatype objects is casted into a pointer to a table of MPI_Datatype types and in the second one, a pointer to a table of const MPI::Info objects is casted into a pointer to a table of MPI_Info types.

That is it is assumed that the MPI::Datatype and MPI::Info have respectively the same memory layout as the corresponding C types MPI_Datatype and MPI_Info.

This assumption is incorrect in both cases even if MPI::Datatype class contains only a single data member of type MPI_Datatype and even if MPI::Info class contains only a single data member of type MPI_Info.

It is incorrect because MPI::Datatype and MPI::Info classes are polymorphics. That is each of them contains at least one virtual method. Since polymorphic classes needs to access the virtual methods table (pointer to members and offset to adjust "this"), the C++ compiler needs to insert at least another member. In all the implementation I've seen this is done by adding a member pointing to the virtual table for the exact class (named "__vtbl"). The resulting classes are then larger than they appear (ex: on my IA32 Linux machine sizeof(MPI::Datatype)==8 and sizeof(MPI::Info)==8 even if sizeof(MPI_Datatype)==4 and sizeof(MPI_Info)==4), the memory layout differs and therefore corresponding pointers cannot be converted by simple type casts.

A table of MPI::Datatype object have then to be converted into a table of MPI_Datatype by a temporary table and a small loop. The same is true for MPI::Info and MPI_Info.

I modified errhandler.h, intracomm.h and intracomm_inln.h files to implement those corrections. As expected it removes the warnings during compilation and should correct the conversion problems in Intracomm::Alltoallw() and Intracomm::Spawn_multiple() methods.

Bellow is the difference between my modified version of OpenMPI and the original one.

Please consider this patch for your next release.

Thanks,

Martin Audet, Research Officer
E: martin.audet@imi.cnrc-nrc.gc.ca  T: 450-641-5034
Indstrial Material Institute, National Research Council
75 de Mortagne,
Boucherville, QC
J4B 6Y4, Canada





diff --recursive --unified openmpi-1.0.1/ompi/mpi/cxx/errhandler.h openmpi-1.0.1ma/ompi/mpi/cxx/errhandler.h
--- openmpi-1.0.1/ompi/mpi/cxx/errhandler.h     2005-11-11 14:21:36.000000000 -0500
+++ openmpi-1.0.1ma/ompi/mpi/cxx/errhandler.h   2005-12-14 15:29:56.000000000 -0500
@@ -124,7 +124,7 @@
 #if ! 0 /* OMPI_ENABLE_MPI_PROFILING */
     // $%%@#%# AIX/POE 2.3.0.0 makes us put in this cast here
     (void)MPI_Errhandler_create((MPI_Handler_function*) &ompi_mpi_cxx_throw_excptn_fctn,
-                               (MPI_Errhandler *) &mpi_errhandler);
+                               const_cast<MPI_Errhandler *>(&mpi_errhandler));
 #else
     pmpi_errhandler.init();
 #endif
@@ -134,7 +134,7 @@
   //this is called from MPI::Finalize
   inline void free() const {
 #if ! 0 /* OMPI_ENABLE_MPI_PROFILING */
-    (void)MPI_Errhandler_free((MPI_Errhandler *) &mpi_errhandler);
+    (void)MPI_Errhandler_free(const_cast<MPI_Errhandler *>(&mpi_errhandler));
 #else
     pmpi_errhandler.free();
 #endif
diff --recursive --unified openmpi-1.0.1/ompi/mpi/cxx/intracomm.h openmpi-1.0.1ma/ompi/mpi/cxx/intracomm.h
--- openmpi-1.0.1/ompi/mpi/cxx/intracomm.h      2005-11-11 14:21:36.000000000 -0500
+++ openmpi-1.0.1ma/ompi/mpi/cxx/intracomm.h    2005-12-14 16:09:29.000000000 -0500
@@ -228,6 +228,10 @@
   PMPI::Intracomm pmpi_comm;
 #endif

+  // Convert an array of p_nbr Info object into an array of MPI_Info.
+  // A pointer to the allocated array is returned and must be eventually deleted.
+  static inline MPI_Info *convert_info_to_mpi_info(int p_nbr, const Info p_info_tbl[]);
+
 public: // JGS see above about friend decls
 #if ! 0 /* OMPI_ENABLE_MPI_PROFILING */
   static Op* current_op;
diff --recursive --unified openmpi-1.0.1/ompi/mpi/cxx/intracomm_inln.h openmpi-1.0.1ma/ompi/mpi/cxx/intracomm_inln.h
--- openmpi-1.0.1/ompi/mpi/cxx/intracomm_inln.h 2005-11-30 06:06:07.000000000 -0500
+++ openmpi-1.0.1ma/ompi/mpi/cxx/intracomm_inln.h       2005-12-14 16:09:35.000000000 -0500
@@ -144,13 +144,26 @@
        void *recvbuf, const int recvcounts[],
        const int rdispls[], const Datatype recvtypes[]) const
 {
+  const int comm_size = Get_size();
+  MPI_Datatype *const data_type_tbl = new MPI_Datatype [2*comm_size];
+
+  // This must be done because Datatype arrays cannot be converted directly into
+  // MPI_Datatype arrays. Since Datatype have virtual methods, Datatype is typically
+  // larger.
+  for (int i_rank=0; i_rank < comm_size; i_rank++) {
+      data_type_tbl[i_rank            ] = sendtypes[i_rank];
+      data_type_tbl[i_rank + comm_size] = recvtypes[i_rank];
+  }
+
   (void)MPI_Alltoallw(const_cast<void *>(sendbuf),
                       const_cast<int *>(sendcounts),
                      const_cast<int *>(sdispls),
-                      (MPI_Datatype *)(sendtypes), recvbuf,
+                      data_type_tbl,recvbuf,
                      const_cast<int *>(recvcounts),
                       const_cast<int *>(rdispls),
-                     (MPI_Datatype *)(recvtypes), mpi_comm);
+                     &data_type_tbl[comm_size], mpi_comm);
+
+  delete [] data_type_tbl;
 }

 inline void
@@ -158,7 +171,7 @@
        const MPI::Datatype & datatype, const MPI::Op& op,
        int root) const
 {
-  current_op = (MPI::Op*)&op;
+  current_op = const_cast<MPI::Op*>(&op);
   (void)MPI_Reduce(const_cast<void *>(sendbuf), recvbuf, count, datatype, op, root, mpi_comm);
   current_op = (Op*)0;
 }
@@ -167,7 +180,7 @@
 MPI::Intracomm::Allreduce(const void *sendbuf, void *recvbuf, int count,
          const MPI::Datatype & datatype, const MPI::Op& op) const
 {
-  current_op = (MPI::Op*)&op;
+  current_op = const_cast<MPI::Op*>(&op);
   (void)MPI_Allreduce (const_cast<void *>(sendbuf), recvbuf, count, datatype,  op, mpi_comm);
   current_op = (Op*)0;
 }
@@ -178,7 +191,7 @@
               const MPI::Datatype & datatype,
               const MPI::Op& op) const
 {
-  current_op = (MPI::Op*)&op;
+  current_op = const_cast<MPI::Op*>(&op);
   (void)MPI_Reduce_scatter(const_cast<void *>(sendbuf), recvbuf, recvcounts,
                           datatype, op, mpi_comm);
   current_op = (Op*)0;
@@ -188,7 +201,7 @@
 MPI::Intracomm::Scan(const void *sendbuf, void *recvbuf, int count,
      const MPI::Datatype & datatype, const MPI::Op& op) const
 {
-  current_op = (MPI::Op*)&op;
+  current_op = const_cast<MPI::Op*>(&op);
   (void)MPI_Scan(const_cast<void *>(sendbuf), recvbuf, count, datatype, op, mpi_comm);
   current_op = (Op*)0;
 }
@@ -198,7 +211,7 @@
                              const MPI::Datatype & datatype,
                              const MPI::Op& op) const
 {
-  current_op = (MPI::Op*)&op;
+  current_op = const_cast<MPI::Op*>(&op);
   (void)MPI_Exscan(const_cast<void *>(sendbuf), recvbuf, count, datatype, op, mpi_comm);
   current_op = (Op*)0;
 }
@@ -328,6 +341,17 @@
   return newcomm;
 }

+inline MPI_Info *
+MPI::Intracomm::convert_info_to_mpi_info(int p_nbr, const Info p_info_tbl[])
+{
+   MPI_Info *const mpi_info_tbl = new MPI_Info [p_nbr];
+
+   for (int i_tbl=0; i_tbl < p_nbr; i_tbl++) {
+       mpi_info_tbl[i_tbl] = p_info_tbl[i_tbl];
+   }
+
+   return mpi_info_tbl;
+}

 inline MPI::Intercomm
 MPI::Intracomm::Spawn_multiple(int count,
@@ -337,10 +361,15 @@
                                      const Info array_of_info[], int root)
 {
   MPI_Comm newcomm;
+  MPI_Info *const array_of_mpi_info = convert_info_to_mpi_info(count, array_of_info);
+
   MPI_Comm_spawn_multiple(count, const_cast<char **>(array_of_commands),
                          const_cast<char ***>(array_of_argv), const_cast<int *>(array_of_maxprocs),
-                         (MPI_Info *) array_of_info, root,
+                         array_of_mpi_info, root,
                          mpi_comm, &newcomm, (int *)MPI_ERRCODES_IGNORE);
+
+  delete [] array_of_mpi_info;
+
   return newcomm;
 }

@@ -354,10 +383,15 @@
                                      int array_of_errcodes[])
 {
   MPI_Comm newcomm;
+  MPI_Info *const array_of_mpi_info = convert_info_to_mpi_info(count, array_of_info);
+
   MPI_Comm_spawn_multiple(count, const_cast<char **>(array_of_commands),
                           const_cast<char ***>(array_of_argv), const_cast<int *>(array_of_maxprocs),
-                          (MPI_Info *) array_of_info, root,
+                          array_of_mpi_info, root,
                           mpi_comm, &newcomm, array_of_errcodes);
+
+  delete [] array_of_mpi_info;
+
   return newcomm;
 }