Index: configure.ac =================================================================== --- configure.ac (revision 16431) +++ configure.ac (working copy) @@ -816,6 +816,24 @@ OMPI_CHECK_BROKEN_QSORT +AC_CACHE_CHECK([if word-sized integers must be word-size aligned], + [ompi_cv_c_word_size_align], + [AC_LANG_PUSH(C) + AC_RUN_IFELSE([AC_LANG_PROGRAM([dnl +#include ], [[ long data[2] = {0, 0}; + long *lp; + int *ip; + ip = (int*) data; + ip++; + lp = (long*) ip; + return lp[0]; ]])], + [ompi_cv_c_word_size_align=no], + [ompi_cv_c_word_size_align=yes], + [ompi_cv_c_word_size_align=yes])]) +AS_IF([test $ompi_cv_c_word_size_align = yes], [results=1], [results=0]) +AC_DEFINE_UNQUOTED([OMPI_ALIGN_WORD_SIZE_INTEGERS], [$results], + [set to 1 if word-size integers must be aligned to word-size padding to prevent bus errors]) + # all: SYSV semaphores # all: SYSV shared memory # all: size of FD_SET Index: ompi/mca/btl/gm/btl_gm.c =================================================================== --- ompi/mca/btl/gm/btl_gm.c (revision 16431) +++ ompi/mca/btl/gm/btl_gm.c (working copy) @@ -716,7 +716,7 @@ /* post the put descriptor */ gm_put(gm_btl->port, des->des_src->seg_addr.pval, - des->des_dst->seg_addr.lval, + des->des_dst->seg_addr.pval, des->des_src->seg_len, GM_LOW_PRIORITY, endpoint->endpoint_addr.node_id, @@ -758,7 +758,7 @@ /* post the put descriptor */ gm_put(gm_btl->port, des->des_src->seg_addr.pval, - des->des_dst->seg_addr.lval, + des->des_dst->seg_addr.pval, des->des_src->seg_len, GM_LOW_PRIORITY, endpoint->endpoint_addr.node_id, @@ -861,7 +861,7 @@ /* post get put descriptor */ gm_get(gm_btl->port, - des->des_dst->seg_addr.lval, + des->des_dst->seg_addr.pval, des->des_src->seg_addr.pval, des->des_src->seg_len, GM_LOW_PRIORITY, @@ -904,7 +904,7 @@ /* post get put descriptor */ gm_get(gm_btl->port, - des->des_dst->seg_addr.lval, + des->des_dst->seg_addr.pval, des->des_src->seg_addr.pval, des->des_src->seg_len, GM_LOW_PRIORITY, Index: ompi/mca/pml/ob1/pml_ob1_recvfrag.c =================================================================== --- ompi/mca/pml/ob1/pml_ob1_recvfrag.c (revision 16431) +++ ompi/mca/pml/ob1/pml_ob1_recvfrag.c (working copy) @@ -122,8 +122,7 @@ MCA_PML_OB1_ACK_HDR_NTOH(hdr->hdr_ack); } #endif - sendreq = (mca_pml_ob1_send_request_t*) - hdr->hdr_ack.hdr_src_req.pval; + sendreq = (mca_pml_ob1_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; sendreq->req_rdma_offset = (size_t)hdr->hdr_ack.hdr_rdma_offset; if(OPAL_THREAD_ADD32(&sendreq->req_state, 1) == 2 && @@ -144,8 +143,7 @@ MCA_PML_OB1_FRAG_HDR_NTOH(hdr->hdr_frag); } #endif - recvreq = (mca_pml_ob1_recv_request_t*) - hdr->hdr_frag.hdr_dst_req.pval; + recvreq = (mca_pml_ob1_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt); break; } @@ -158,8 +156,7 @@ we remember if we ever change the bml. */ assert(0 == (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO)); #endif - sendreq = (mca_pml_ob1_send_request_t*) - hdr->hdr_rdma.hdr_req.pval; + sendreq = (mca_pml_ob1_send_request_t*)hdr->hdr_rdma.hdr_req.pval; mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma); break; } @@ -171,8 +168,7 @@ MCA_PML_OB1_FIN_HDR_NTOH(hdr->hdr_fin); } #endif - rdma = (mca_btl_base_descriptor_t*) - hdr->hdr_fin.hdr_des.pval; + rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval; rdma->des_cbfunc(btl, NULL, rdma, OMPI_SUCCESS); break; } Index: ompi/datatype/position.c =================================================================== --- ompi/datatype/position.c (revision 16431) +++ ompi/datatype/position.c (working copy) @@ -53,7 +53,7 @@ size_t* SPACE ) { uint32_t _copy_count = *(COUNT); - size_t _copy_blength; + size_t _copy_blength; ddt_elem_desc_t* _elem = &((ELEM)->elem); _copy_blength = ompi_ddt_basicDatatypes[_elem->common.type]->size; @@ -109,7 +109,7 @@ size_t iov_len_local; ptrdiff_t extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; - DUMP( "ompi_convertor_generic_simple_pack( %p, &%ld )\n", (void*)pConvertor, (long)*position ); + DUMP( "ompi_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); /* We dont want to have to parse the datatype multiple times. What we are interested in * here is to compute the number of completed datatypes that we can move forward, update Index: ompi/datatype/datatype_pack.c =================================================================== --- ompi/datatype/datatype_pack.c (revision 16431) +++ ompi/datatype/datatype_pack.c (working copy) @@ -71,7 +71,7 @@ if( (size_t)iov[iov_count].iov_len > length ) iov[iov_count].iov_len = length; if( iov[iov_count].iov_base == NULL ) { - iov[iov_count].iov_base = source_base; + iov[iov_count].iov_base = (IOVBASE_TYPE *) source_base; COMPUTE_CSUM( iov[iov_count].iov_base, iov[iov_count].iov_len, pConv ); } else { /* contiguous data just memcpy the smallest data in the user buffer */ @@ -133,7 +133,7 @@ if( (uint32_t)pStack->count < ((*out_size) - iov_count) ) { pStack[1].count = pData->size - (pConv->bConverted % pData->size); for( index = iov_count; i < pConv->count; i++, index++ ) { - iov[index].iov_base = user_memory; + iov[index].iov_base = (IOVBASE_TYPE *) user_memory; iov[index].iov_len = pStack[1].count; pStack[0].disp += extent; total_bytes_converted += pStack[1].count; @@ -156,13 +156,13 @@ for( index = iov_count; (i < pConv->count) && (index < (*out_size)); i++, index++ ) { if( max_allowed < pData->size ) { - iov[index].iov_base = user_memory; + iov[index].iov_base = (IOVBASE_TYPE *) user_memory; iov[index].iov_len = max_allowed; max_allowed = 0; COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); break; } else { - iov[index].iov_base = user_memory; + iov[index].iov_base = (IOVBASE_TYPE *) user_memory; iov[index].iov_len = pData->size; user_memory += extent; COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv ); @@ -249,7 +249,6 @@ dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ uint32_t count_desc; /* the number of items already done in the actual pos_desc */ - uint16_t type; /* type at current position */ size_t total_packed = 0; /* total amount packed this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; @@ -295,7 +294,6 @@ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); continue; } - type = pElem->elem.common.type; goto complete_loop; } if( DT_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ @@ -356,23 +354,6 @@ *max_data = total_packed; *out_size = iov_count; -#if 0 - if( pConvertor->flags & CONVERTOR_WITH_CHECKSUM ) { - uint32_t ui1 = 0, ui2 = 0, csum = OPAL_CSUM_ZERO; - /** - * Check the checksum correctness. - */ - for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { - csum += opal_uicsum_partial( iov[iov_count].iov_base, iov[iov_count].iov_len, - &ui1, &ui2 ); - } - if( csum != pConvertor->checksum ) { - opal_output( 0, "error in the pack function the checksum does not match\n" - "(%d != %d)\n", csum, pConvertor->checksum ); - } - } -#endif - if( pConvertor->bConverted == pConvertor->local_size ) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; Index: ompi/datatype/datatype_unpack.c =================================================================== --- ompi/datatype/datatype_unpack.c (revision 16431) +++ ompi/datatype/datatype_unpack.c (working copy) @@ -294,9 +294,9 @@ uint32_t i, length, count_desc = 1; size_t data_length = ompi_ddt_basicDatatypes[pElem->elem.common.type]->size; - DO_DEBUG( opal_output( 0, "unpack partial data start %d end %d data_length %lu user %p\n" + DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n" "\tbConverted %lu total_length %lu count %d\n", - start_position, end_position, (unsigned long)data_length, *user_buffer, + (unsigned long)start_position, (unsigned long)end_position, (unsigned long)data_length, *user_buffer, (unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); ); /* Find a byte that is not used in the partial buffer */ @@ -440,7 +440,7 @@ if( DT_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "unpack end_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", (int)pStack->count, pConvertor->stack_pos, pos_desc, - (long)pStack->disp, (unsigned long)iov_len_local ); ); + (long)pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( pConvertor->stack_pos == 0 ) { /* Force the conversion to stop by lowering the number of iovecs. */ Index: ompi/datatype/datatype_pack.h =================================================================== --- ompi/datatype/datatype_pack.h (revision 16431) +++ ompi/datatype/datatype_pack.h (working copy) @@ -21,7 +21,7 @@ size_t* SPACE ) { uint32_t _copy_count = *(COUNT); - size_t _copy_blength; + size_t _copy_blength; ddt_elem_desc_t* _elem = &((ELEM)->elem); char* _source = (*SOURCE) + _elem->disp; @@ -36,8 +36,8 @@ /* the extent and the size of the basic datatype are equals */ OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %ld ) => space %lu\n", - *(DESTINATION), _source, _copy_blength, (unsigned long)(*(SPACE)) ); ); + DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu\n", + *(DESTINATION), _source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) ); _source += _copy_blength; *(DESTINATION) += _copy_blength; @@ -46,8 +46,8 @@ for( _i = 0; _i < _copy_count; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %ld ) => space %lu\n", - *(DESTINATION), _source, _copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); + DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + *(DESTINATION), _source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) ); *(DESTINATION) += _copy_blength; _source += _elem->extent; @@ -77,8 +77,8 @@ for( _i = 0; _i < _copy_loops; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _source, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %ld ) => space %ld\n", - *(DESTINATION), _source, _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); + DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu\n", + *(DESTINATION), _source, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); MEMCPY_CSUM( *(DESTINATION), _source, _end_loop->size, (CONVERTOR) ); *(DESTINATION) += _end_loop->size; _source += _loop->extent; Index: ompi/datatype/dt_add.c =================================================================== --- ompi/datatype/dt_add.c (revision 16431) +++ ompi/datatype/dt_add.c (working copy) @@ -87,37 +87,48 @@ */ if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb); + /* handle special cases for DT_LB and DT_UB and their duplicate */ + if( DT_LB == pdtAdd->id ) { + pdtBase->bdt_used |= (((uint64_t)1) << DT_LB); + if( pdtBase->flags & DT_FLAG_USER_LB ) { + pdtBase->lb = LMIN( pdtBase->lb, disp ); + } else { + pdtBase->lb = disp; + pdtBase->flags |= DT_FLAG_USER_LB; + } + if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { + pdtBase->flags &= ~DT_FLAG_NO_GAPS; + } + return OMPI_SUCCESS; + } else if( DT_UB == pdtAdd->id ) { + pdtBase->bdt_used |= (((uint64_t)1) << DT_UB); + if( pdtBase->flags & DT_FLAG_USER_UB ) { + pdtBase->ub = LMAX( pdtBase->ub, disp ); + } else { + pdtBase->ub = disp; + pdtBase->flags |= DT_FLAG_USER_UB; + } + if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { + pdtBase->flags &= ~DT_FLAG_NO_GAPS; + } + return OMPI_SUCCESS; + } if( pdtAdd->flags & DT_FLAG_PREDEFINED ) { /* add a basic datatype */ - /* handle special cases for DT_LB and DT_UB */ - if( pdtAdd == ompi_ddt_basicDatatypes[DT_LB] ) { - pdtBase->bdt_used |= (((uint64_t)1) << DT_LB); - if( pdtBase->flags & DT_FLAG_USER_LB ) { - pdtBase->lb = LMIN( pdtBase->lb, disp ); + place_needed = (extent == (ptrdiff_t)pdtAdd->size ? 1 : 3); + } else { + place_needed = pdtAdd->desc.used; + if( count != 1 ) { + if( place_needed < (MAX_DT_COMPONENT_COUNT - 2) ) { + place_needed += 2; /* for the loop markers */ } else { - pdtBase->lb = disp; - pdtBase->flags |= DT_FLAG_USER_LB; + /* The data-type contain too many elements. We will be unable + * to handle it, so let's just complain by now. + */ + opal_output( 0, "Too many elements in the datatype. The limit is %ud\n", + MAX_DT_COMPONENT_COUNT ); + return OMPI_ERROR; } - if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { - pdtBase->flags &= ~DT_FLAG_NO_GAPS; - } - return OMPI_SUCCESS; - } else if( pdtAdd == ompi_ddt_basicDatatypes[DT_UB] ) { - pdtBase->bdt_used |= (((uint64_t)1) << DT_UB); - if( pdtBase->flags & DT_FLAG_USER_UB ) { - pdtBase->ub = LMAX( pdtBase->ub, disp ); - } else { - pdtBase->ub = disp; - pdtBase->flags |= DT_FLAG_USER_UB; - } - if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { - pdtBase->flags &= ~DT_FLAG_NO_GAPS; - } - return OMPI_SUCCESS; } - place_needed = (extent == (ptrdiff_t)pdtAdd->size ? 1 : 3); - } else { - place_needed = pdtAdd->desc.used; - if( count != 1 ) place_needed += 2; /* for the loop markers */ } /* @@ -167,26 +178,20 @@ ub = LMAX( pdtBase->ub, ub ); } /* While the true_lb and true_ub have to be ordered to have the true_lb lower - * than the true_ub, the ub and lb does not have to be ordered. They should be + * than the true_ub, the ub and lb do not have to be ordered. They should be * as the user define them. */ pdtBase->lb = lb; pdtBase->ub = ub; - if( 0 == pdtBase->nbElems ) old_true_ub = disp; - else old_true_ub = pdtBase->true_ub; - pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb ); - pdtBase->true_ub = LMAX( true_ub, pdtBase->true_ub ); - /* compute the new memory alignement */ pdtBase->align = IMAX( pdtBase->align, pdtAdd->align ); - pdtBase->size += count * pdtAdd->size; /* Now that we have the new ub and the alignment we should update the ub to match - * the new alignement. We have to add an epsilon that is the least nonnegative increment - * needed to roung the extent to the next multiple of the alignment. This rule - * apply only if there is user specified upper bound as stated in the MPI - * standard MPI 1.2 page 71. + * the new alignement. We have to add an epsilon that is the least nonnegative + * increment needed to roung the extent to the next multiple of the alignment. + * This rule apply only if there is user specified upper bound as stated in the + * MPI standard MPI 1.2 page 71. */ if( !(pdtBase->flags & DT_FLAG_USER_UB) ) { epsilon = (pdtBase->ub - pdtBase->lb) % pdtBase->align; @@ -194,19 +199,33 @@ pdtBase->ub += (pdtBase->align - epsilon); } } + /* now we know it contain some data */ + pdtBase->flags |= DT_FLAG_DATA; /* - * the count == 0 is LEGAL only for MPI_UB and MPI_LB. I accept it just as a nice way to set - * the soft UB for a data (without using a real UB marker). This approach can be used to - * create the subarray and darray datatype. However from the MPI level this function - * should never be called directly with a count set to 0. - * Adding a data-type with a size zero is legal but does not have to go through all the - * stuff below. + * the count == 0 is LEGAL only for MPI_UB and MPI_LB. Therefore we support it + * here in the upper part of this function. As an extension, the count set to + * zero can be used to reset the alignment of the data, but not for changing + * the true_lb and true_ub. */ if( (0 == count) || (0 == pdtAdd->size) ) { return OMPI_SUCCESS; } + /* Now, once we know everything is fine and there are some bytes in + * the data-type we can update the size, true_lb and true_ub. + */ + pdtBase->size += count * pdtAdd->size; + if( 0 == pdtBase->nbElems ) old_true_ub = disp; + else old_true_ub = pdtBase->true_ub; + if( 0 != pdtBase->size ) { + pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb ); + pdtBase->true_ub = LMAX( true_ub, pdtBase->true_ub ); + } else { + pdtBase->true_lb = true_lb; + pdtBase->true_ub = true_ub; + } + pdtBase->bdt_used |= pdtAdd->bdt_used; newLength = pdtBase->desc.used + place_needed; if( newLength > pdtBase->desc.length ) { Index: ompi/datatype/datatype_unpack.h =================================================================== --- ompi/datatype/datatype_unpack.h (revision 16431) +++ ompi/datatype/datatype_unpack.h (working copy) @@ -21,7 +21,7 @@ size_t* SPACE ) /* the space in the destination buffer */ { uint32_t _copy_count = *(COUNT); - size_t _copy_blength; + size_t _copy_blength; ddt_elem_desc_t* _elem = &((ELEM)->elem); char* _destination = (*DESTINATION) + _elem->disp; @@ -36,8 +36,8 @@ /* the extent and the size of the basic datatype are equals */ OMPI_DDT_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %ld ) => space %lu\n", - _destination, *(SOURCE), _copy_blength, (unsigned long)(*(SPACE)) ); ); + DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu\n", + _destination, *(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) ); *(SOURCE) += _copy_blength; _destination += _copy_blength; @@ -46,8 +46,8 @@ for( _i = 0; _i < _copy_count; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %ld ) => space %lu\n", - _destination, *(SOURCE), _copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); + DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", + _destination, *(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) ); *(SOURCE) += _copy_blength; _destination += _elem->extent; @@ -77,8 +77,8 @@ for( _i = 0; _i < _copy_loops; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _destination, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %ld ) => space %ld\n", - _destination, *(SOURCE), _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); + DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu\n", + _destination, *(SOURCE), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); MEMCPY_CSUM( _destination, *(SOURCE), _end_loop->size, (CONVERTOR) ); *(SOURCE) += _end_loop->size; _destination += _loop->extent; Index: ompi/datatype/dt_sndrcv.c =================================================================== --- ompi/datatype/dt_sndrcv.c (revision 16431) +++ ompi/datatype/dt_sndrcv.c (working copy) @@ -47,14 +47,10 @@ uint32_t iov_count; size_t max_data; - /* First check if we really have something to do */ - if (0 == rcount) { - if (0 == scount) { - return MPI_SUCCESS; - } else { - return MPI_ERR_TRUNCATE; - } - } + /* First check if we really have something to do */ + if (0 == rcount) { + return ((0 == scount) ? MPI_SUCCESS : MPI_ERR_TRUNCATE); + } /* If same datatypes used, just copy. */ if (sdtype == rdtype) { Index: ompi/datatype/dt_copy.c =================================================================== --- ompi/datatype/dt_copy.c (revision 16431) +++ ompi/datatype/dt_copy.c (working copy) @@ -60,8 +60,8 @@ OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); /* the extent and the size of the basic datatype are equals */ - DO_DEBUG( opal_output( 0, "copy 1. memcpy( %p, %p, %ld ) => space %lu\n", - _destination, _source, _copy_blength, (unsigned long)(*(SPACE)) ); ); + DO_DEBUG( opal_output( 0, "copy 1. memcpy( %p, %p, %lu ) => space %lu\n", + _destination, _source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); MEMCPY( _destination, _source, _copy_blength ); _source += _copy_blength; _destination += _copy_blength; @@ -70,8 +70,8 @@ for( _i = 0; _i < _copy_count; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); - DO_DEBUG( opal_output( 0, "copy 2. memcpy( %p, %p, %ld ) => space %lu\n", - _destination, _source, _copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); + DO_DEBUG( opal_output( 0, "copy 2. memcpy( %p, %p, %lu ) => space %lu\n", + _destination, _source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); MEMCPY( _destination, _source, _copy_blength ); _source += _elem->extent; _destination += _elem->extent; @@ -106,8 +106,8 @@ for( _i = 0; _i < _copy_loops; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _source, _end_loop->size, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); - DO_DEBUG( opal_output( 0, "copy 3. memcpy( %p, %p, %ld ) => space %ld\n", - _destination, _source, _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); + DO_DEBUG( opal_output( 0, "copy 3. memcpy( %p, %p, %lu ) => space %lu\n", + _destination, _source, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); MEMCPY( _destination, _source, _end_loop->size ); _source += _loop->extent; _destination += _loop->extent; Index: ompi/datatype/dt_create_dup.c =================================================================== --- ompi/datatype/dt_create_dup.c (revision 16431) +++ ompi/datatype/dt_create_dup.c (working copy) @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -60,7 +60,8 @@ memcpy( pdt->opt_desc.desc, oldType->opt_desc.desc, desc_length * sizeof(dt_elem_desc_t) ); } } - pdt->id = 0; + pdt->id = oldType->id; /* preserve the default id. This allow us to + * copy predefined types. */ pdt->args = NULL; *newType = pdt; return OMPI_SUCCESS; Index: ompi/datatype/dt_module.c =================================================================== --- ompi/datatype/dt_module.c (revision 16431) +++ ompi/datatype/dt_module.c (working copy) @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -127,7 +127,7 @@ OMPI_DECLSPEC ompi_datatype_t ompi_mpi_wchar = INIT_BASIC_DATA( wchar_t, OMPI_ALIGNMENT_WCHAR, WCHAR, DT_FLAG_DATA_C ); #else OMPI_DECLSPEC ompi_datatype_t ompi_mpi_wchar = INIT_UNAVAILABLE_DATA( WCHAR ); -#endif /* FTMPI_HAVE_WCHAR_T */ +#endif /* OMPI_ALIGNMENT_WCHAR */ OMPI_DECLSPEC ompi_datatype_t ompi_mpi_cxx_bool = INIT_BASIC_DATA( bool, OMPI_ALIGNMENT_CXX_BOOL, CXX_BOOL, DT_FLAG_DATA_CPP ); OMPI_DECLSPEC ompi_datatype_t ompi_mpi_logic = INIT_BASIC_FORTRAN_TYPE( DT_LOGIC, LOGIC, OMPI_SIZEOF_FORTRAN_LOGICAL, OMPI_ALIGNMENT_FORTRAN_LOGICAL, 0 ); @@ -411,7 +411,7 @@ datatype->desc.length = 1; datatype->desc.used = 1; - /* By default the optimized descritption is the same as the default + /* By default the optimized description is the same as the default * description for predefined datatypes. */ datatype->opt_desc = datatype->desc; @@ -639,6 +639,10 @@ ompi_ddt_number_of_predefined_data = (ompi_mpi_##name).d_f_to_c_index; \ } + /* + * This MUST match the order of ompi/include/mpif-common.h + * Any change will break binary compatability of Fortran programs. + */ MOOG(datatype_null); MOOG(byte); MOOG(packed); @@ -769,7 +773,7 @@ static int _dump_data_flags( unsigned short usflags, char* ptr, size_t length ) { if( length < 21 ) return 0; - sprintf( ptr, "-----------[---][---]" ); /* set everything to - */ + snprintf( ptr, 21, "-----------[---][---]" ); /* set everything to - */ if( usflags & DT_FLAG_DESTROYED ) ptr[0] = 'd'; if( usflags & DT_FLAG_COMMITED ) ptr[1] = 'c'; if( usflags & DT_FLAG_CONTIGUOUS ) ptr[2] = 'C'; @@ -811,13 +815,13 @@ static int __dump_data_desc( dt_elem_desc_t* pDesc, int nbElems, char* ptr, size_t length ) { int i; - size_t index = 0; + int32_t index = 0; for( i = 0; i < nbElems; i++ ) { index += _dump_data_flags( pDesc->elem.common.flags, ptr + index, length ); - if( length <= index ) break; + if( length <= (size_t)index ) break; index += snprintf( ptr + index, length - index, "%15s ", ompi_ddt_basicDatatypes[pDesc->elem.common.type]->name ); - if( length <= index ) break; + if( length <= (size_t)index ) break; if( DT_LOOP == pDesc->elem.common.type ) index += snprintf( ptr + index, length - index, "%d times the next %d elements extent %d\n", (int)pDesc->loop.loops, (int)pDesc->loop.items, @@ -832,7 +836,7 @@ (int)pDesc->elem.extent, (long)(pDesc->elem.count * ompi_ddt_basicDatatypes[pDesc->elem.common.type]->size) ); pDesc++; - if( length <= index ) break; + if( length <= (size_t)index ) break; } return index; } @@ -840,7 +844,7 @@ static inline int __dt_contain_basic_datatypes( const ompi_datatype_t* pData, char* ptr, size_t length ) { int i; - size_t index = 0; + int32_t index = 0; uint64_t mask = 1; if( pData->flags & DT_FLAG_USER_LB ) index += snprintf( ptr, length - index, "lb " ); @@ -849,7 +853,7 @@ if( pData->bdt_used & mask ) index += snprintf( ptr + index, length - index, "%s ", ompi_ddt_basicDatatypes[i]->name ); mask <<= 1; - if( length <= index ) break; + if( length <= (size_t)index ) break; } return index; } Index: ompi/datatype/dt_destroy.c =================================================================== --- ompi/datatype/dt_destroy.c (revision 16431) +++ ompi/datatype/dt_destroy.c (working copy) @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -25,7 +25,7 @@ { ompi_datatype_t* pData = *dt; - if( pData->flags & DT_FLAG_PREDEFINED ) + if( (pData->flags & DT_FLAG_PREDEFINED) && (pData->super.obj_reference_count <= 1) ) return OMPI_ERROR; OBJ_RELEASE( pData ); Index: ompi/datatype/datatype.h =================================================================== --- ompi/datatype/datatype.h (revision 16431) +++ ompi/datatype/datatype.h (working copy) @@ -89,9 +89,16 @@ typedef union dt_elem_desc dt_elem_desc_t; +/** + * The number of supported entries in the data-type definition and the + * associated type. + */ +#define MAX_DT_COMPONENT_COUNT UINT_MAX +typedef uint32_t opal_ddt_count_t; + typedef struct dt_type_desc { - uint32_t length; /* the maximum number of elements in the description array */ - uint32_t used; /* the number of used elements in the description array */ + opal_ddt_count_t length; /* the maximum number of elements in the description array */ + opal_ddt_count_t used; /* the number of used elements in the description array */ dt_elem_desc_t* desc; } dt_type_desc_t; @@ -129,11 +136,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( ompi_datatype_t ); int ompi_ddt_register_params(void); -int32_t ompi_ddt_init( void ); -int32_t ompi_ddt_finalize( void ); +OMPI_DECLSPEC int32_t ompi_ddt_init( void ); +OMPI_DECLSPEC int32_t ompi_ddt_finalize( void ); ompi_datatype_t* ompi_ddt_create( int32_t expectedSize ); -int32_t ompi_ddt_commit( ompi_datatype_t** ); -int32_t ompi_ddt_destroy( ompi_datatype_t** ); +OMPI_DECLSPEC int32_t ompi_ddt_commit( ompi_datatype_t** ); +OMPI_DECLSPEC int32_t ompi_ddt_destroy( ompi_datatype_t** ); static inline int32_t ompi_ddt_is_committed( const ompi_datatype_t* type ) { return ((type->flags & DT_FLAG_COMMITED) == DT_FLAG_COMMITED); } static inline int32_t ompi_ddt_is_overlapped( const ompi_datatype_t* type ) Index: ompi/datatype/convertor.c =================================================================== --- ompi/datatype/convertor.c (revision 16431) +++ ompi/datatype/convertor.c (working copy) @@ -499,7 +499,7 @@ { /* Here I should check that the data is not overlapping */ - convertor->flags |= CONVERTOR_RECV; + convertor->flags |= CONVERTOR_RECV; OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); @@ -535,7 +535,7 @@ int32_t count, const void* pUserBuf ) { - convertor->flags |= CONVERTOR_SEND; + convertor->flags |= CONVERTOR_SEND; OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); Index: ompi/datatype/dt_args.c =================================================================== --- ompi/datatype/dt_args.c (revision 16431) +++ ompi/datatype/dt_args.c (working copy) @@ -1,9 +1,9 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -45,6 +45,20 @@ } ompi_ddt_args_t; /** + * Compute the next value which is a multiple of PWROF2. Works fine + * only for power of 2 alignements. + */ +#define ALIGN_INT_TO( VALUE, PWROF2 ) \ + do { \ + int _align = (intptr_t)((PWROF2) - 1); \ + int _val = (int)(VALUE) + _align; \ + (VALUE) = (_val & (~_align)); \ + } while(0) + +#define CHECK_ALIGN_TO( VALUE, PWROF2 ) \ + assert( 0 == ((VALUE) & ((PWROF2) - 1)) ); \ + +/** * Some architecture require that 64 bits pointers (to pointers) has to * be 64 bits aligned. As in the ompi_ddt_args_t structure we have 2 such * pointers and one to an array of ints, if we start by setting the 64 @@ -76,6 +90,7 @@ pArgs->ref_count = 1; \ pArgs->total_pack_size = (4 + (IC)) * sizeof(int) + \ (AC) * sizeof(MPI_Aint) + (DC) * sizeof(int); \ + ALIGN_INT_TO( pArgs->total_pack_size, sizeof(MPI_Aint) ); \ (PDATA)->args = (void*)pArgs; \ (PDATA)->packed_description = NULL; \ } while(0) @@ -207,6 +222,10 @@ */ OBJ_RETAIN( d[pos] ); pArgs->total_pack_size += ((ompi_ddt_args_t*)d[pos]->args)->total_pack_size; + /* as total_pack_size is always aligned to MPI_Aint size their sum + * will be aligned to ... + */ + CHECK_ALIGN_TO( pArgs->total_pack_size, sizeof(MPI_Aint) ); } } return MPI_SUCCESS; @@ -375,14 +394,14 @@ if( datatype->flags & DT_FLAG_PREDEFINED ) { return sizeof(int) * 2; } + assert( NULL != (ompi_ddt_args_t*)datatype->args ); return ((ompi_ddt_args_t*)datatype->args)->total_pack_size; } static inline int __ompi_ddt_pack_description( ompi_datatype_t* datatype, void** packed_buffer, int* next_index ) { - int* position = (int*)*packed_buffer; - int local_index = 0, i; + int i, *position = (int*)*packed_buffer; ompi_ddt_args_t* args = (ompi_ddt_args_t*)datatype->args; char* next_packed = (char*)*packed_buffer; @@ -393,23 +412,37 @@ } /* For duplicated datatype we don't have to store all the information */ if( MPI_COMBINER_DUP == args->create_type ) { - position[local_index++] = args->create_type; - position[local_index++] = args->d[0]->id; + position[0] = args->create_type; + position[1] = args->d[0]->id; return OMPI_SUCCESS; } - position[local_index++] = args->create_type; - position[local_index++] = args->ci; - position[local_index++] = args->ca; - position[local_index++] = args->cd; - memcpy( &(position[local_index]), args->i, sizeof(int) * args->ci ); - next_packed += ( 4 + args->ci) * sizeof(int); - local_index += args->ci; + position[0] = args->create_type; + position[1] = args->ci; + position[2] = args->ca; + position[3] = args->cd; + next_packed += (4 * sizeof(int)); + /* So far there are 4 integers in the array, so we're still 64 bits aligned + * if we suppose that the original buffer was 64 bits aligned. + * + * In order to solve issues with the Sparc 64 which require 64 bits pointers + * to be correctly aligned, we have to start adding the data in a smart way, + * just to keep everything as aligned as possible. Therefore, the first + * array we have to copy is the array of displacements, followed by the + * array of datatypes (both of them might be arrays of pointers) and then + * finally the array of counts. + */ if( 0 < args->ca ) { - memcpy( &(position[local_index]), args->a, sizeof(MPI_Aint) * args->ca ); + memcpy( next_packed, args->a, sizeof(MPI_Aint) * args->ca ); next_packed += sizeof(MPI_Aint) * args->ca; } position = (int*)next_packed; next_packed += sizeof(int) * args->cd; + + /* copy the aray of counts (32 bits aligned) */ + memcpy( next_packed, args->i, sizeof(int) * args->ci ); + next_packed += args->ci * sizeof(int); + + /* copy the rest of the data */ for( i = 0; i < args->cd; i++ ) { ompi_datatype_t* temp_data = args->d[i]; if( temp_data->flags & DT_FLAG_PREDEFINED ) { @@ -436,6 +469,8 @@ if( NULL == datatype->packed_description ) { if( datatype->flags & DT_FLAG_PREDEFINED ) { datatype->packed_description = malloc( 2 * sizeof(int) ); + } else if( NULL == args ) { + return OMPI_ERROR; } else { datatype->packed_description = malloc( args->total_pack_size ); } @@ -448,86 +483,114 @@ static ompi_datatype_t* __ompi_ddt_create_from_packed_description( void** packed_buffer, - struct ompi_proc_t* remote_processor ) + const struct ompi_proc_t* remote_processor ) { - int* position = (int*)*packed_buffer; + int* position; ompi_datatype_t* datatype = NULL; ompi_datatype_t** array_of_datatype; MPI_Aint* array_of_disp; int* array_of_length; - int number_of_length, number_of_disp, number_of_datatype; + int number_of_length, number_of_disp, number_of_datatype, data_id; int create_type, i; - char* next_buffer = (char*)*packed_buffer; + char* next_buffer; + bool free_array_of_disp = false; + #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT bool need_swap = false; - if ((remote_processor->proc_arch & OMPI_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->proc_arch & OMPI_ARCH_ISBIGENDIAN)) { - need_swap = true; + if( (remote_processor->proc_arch ^ ompi_proc_local()->proc_arch) & + OMPI_ARCH_ISBIGENDIAN ) { + need_swap = true; } #endif + next_buffer = (char*)*packed_buffer; + position = (int*)next_buffer; + + create_type = position[0]; #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { - create_type = opal_swap_bytes4(position[0]); - } else + create_type = opal_swap_bytes4(create_type); + } #endif - { - create_type = position[0]; - } if( MPI_COMBINER_DUP == create_type ) { /* there we have a simple predefined datatype */ + data_id = position[1]; #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { - position[1] = opal_swap_bytes4(position[1]); + data_id = opal_swap_bytes4(data_id); } #endif - assert( position[1] < DT_MAX_PREDEFINED ); + assert( data_id < DT_MAX_PREDEFINED ); *packed_buffer = position + 2; - return (ompi_datatype_t*)ompi_ddt_basicDatatypes[position[1]]; + return (ompi_datatype_t*)ompi_ddt_basicDatatypes[data_id]; } + + number_of_length = position[1]; + number_of_disp = position[2]; + number_of_datatype = position[3]; #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { - number_of_length = opal_swap_bytes4(position[1]); - number_of_disp = opal_swap_bytes4(position[2]); - number_of_datatype = opal_swap_bytes4(position[3]); - } else + number_of_length = opal_swap_bytes4(number_of_length); + number_of_disp = opal_swap_bytes4(number_of_disp); + number_of_datatype = opal_swap_bytes4(number_of_datatype); + } #endif - { - number_of_length = position[1]; - number_of_disp = position[2]; - number_of_datatype = position[3]; - } array_of_datatype = (ompi_datatype_t**)malloc( sizeof(ompi_datatype_t*) * number_of_datatype ); -#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT - if (need_swap) { - position[4] = opal_swap_bytes4(position[4]); - } -#endif - array_of_length = &(position[4]); - next_buffer += (4 + number_of_length) * sizeof(int); - array_of_disp = (MPI_Aint*)next_buffer; - next_buffer += number_of_disp * sizeof(MPI_Aint); - position = (int*)next_buffer; - next_buffer += number_of_datatype * sizeof(int); + next_buffer += (4 * sizeof(int)); /* move after the header */ + + array_of_disp = (MPI_Aint*)next_buffer; + next_buffer += number_of_disp * sizeof(MPI_Aint); + /* the other datatypes */ + position = (int*)next_buffer; + next_buffer += number_of_datatype * sizeof(int); + /* the array of lengths (32 bits aligned) */ + array_of_length = (int*)next_buffer; + next_buffer += (number_of_length * sizeof(int)); + for( i = 0; i < number_of_datatype; i++ ) { + data_id = position[i]; #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { - position[i] = opal_swap_bytes4(position[i]); + data_id = opal_swap_bytes4(data_id); } #endif - if( position[i] < DT_MAX_PREDEFINED ) { - assert( position[i] < DT_MAX_PREDEFINED ); - array_of_datatype[i] = (ompi_datatype_t*)ompi_ddt_basicDatatypes[position[i]]; - } else { - array_of_datatype[i] = - __ompi_ddt_create_from_packed_description( (void**)&next_buffer, - remote_processor ); - if( NULL == array_of_datatype[i] ) - goto cleanup_and_exit; + if( data_id < DT_MAX_PREDEFINED ) { + array_of_datatype[i] = (ompi_datatype_t*)ompi_ddt_basicDatatypes[data_id]; + continue; } + array_of_datatype[i] = + __ompi_ddt_create_from_packed_description( (void**)&next_buffer, + remote_processor ); + if( NULL == array_of_datatype[i] ) { + /* don't cleanup more than required. We can now modify these + * values as we already know we have failed to rebuild the + * datatype. + */ + array_of_datatype[i] = (ompi_datatype_t*)ompi_ddt_basicDatatypes[DT_BYTE]; + number_of_datatype = i; + goto cleanup_and_exit; + } } + +#if OMPI_ALIGN_WORD_SIZE_INTEGERS + /** + * some architectures really don't like having unaligned + * accesses. We'll be int aligned, because any sane system will + * require that. But we might not be long aligned, and some + * architectures will complain if a long is accessed on int + * alignment (but not long alignment). On those architectures, + * copy the buffer into an aligned buffer first. + */ + if( 0 != number_of_disp ) { + char* ptr = array_of_disp; + free_array_of_disp = true; + array_of_disp = malloc(sizeof(MPI_Aint) * number_of_disp); + memcpy(array_of_disp, ptr, sizeof(MPI_Aint) * number_of_disp); + } +#endif + #if OMPI_ENABLE_HETEROGENEOUS_SUPPORT if (need_swap) { for (i = 0 ; i < number_of_length ; ++i) { @@ -537,7 +600,7 @@ #if SIZEOF_PTRDIFF_T == 4 array_of_disp[i] = opal_swap_bytes4(array_of_disp[i]); #elif SIZEOF_PTRDIFF_T == 8 - array_of_disp[i] = opal_swap_bytes8(array_of_disp[i]); + array_of_disp[i] = (MPI_Aint)opal_swap_bytes8(array_of_disp[i]); #else #error "Unknown size of ptrdiff_t" #endif @@ -553,6 +616,7 @@ OBJ_RELEASE(array_of_datatype[i]); } } + if (free_array_of_disp) free(array_of_disp); free( array_of_datatype ); return datatype; } Index: ompi/datatype/convertor.h =================================================================== --- ompi/datatype/convertor.h (revision 16431) +++ ompi/datatype/convertor.h (working copy) @@ -63,7 +63,7 @@ struct ompi_convertor_master_t; typedef struct dt_stack { - int16_t index; /**< index in the element description */ + int32_t index; /**< index in the element description */ int16_t type; /**< the type used for the last pack/unpack (original or DT_BYTE) */ size_t count; /**< number of times we still have to do it */ ptrdiff_t disp; /**< actual displacement depending on the count field */ @@ -204,6 +204,7 @@ convertor->remoteArch = pSrcConv->remoteArch; convertor->flags = (pSrcConv->flags | flags); convertor->master = pSrcConv->master; + return ompi_convertor_prepare_for_send( convertor, datatype, count, pUserBuf ); } @@ -261,8 +262,7 @@ if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && (convertor->flags & DT_FLAG_NO_GAPS) && - ((convertor->flags & CONVERTOR_SEND) || - (convertor->flags & CONVERTOR_HOMOGENEOUS)) ) { + (convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { /* Contiguous and no checkpoint and no homogeneous unpack */ convertor->bConverted = *position; return OMPI_SUCCESS; Index: ompi/datatype/dt_optimize.c =================================================================== --- ompi/datatype/dt_optimize.c (revision 16431) +++ ompi/datatype/dt_optimize.c (working copy) @@ -279,8 +279,8 @@ dt_elem_desc_t* pElem = pData->desc.desc; index = GET_FIRST_NON_LOOP( pElem ); - assert( pData->desc.desc[index].elem.common.flags & DT_FLAG_DATA ); - first_elem_disp = pData->desc.desc[index].elem.disp; + assert( pElem[index].elem.common.flags & DT_FLAG_DATA ); + first_elem_disp = pElem[index].elem.disp; } /* let's add a fake element at the end just to avoid useless comparaisons Index: ompi/datatype/datatype_prototypes.h =================================================================== --- ompi/datatype/datatype_prototypes.h (revision 16431) +++ ompi/datatype/datatype_prototypes.h (working copy) @@ -15,51 +15,51 @@ #include "ompi_config.h" -OMPI_DECLSPEC int32_t +int32_t ompi_pack_homogeneous_contig( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_pack_homogeneous_contig_checksum( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_pack_homogeneous_contig_with_gaps( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_pack_homogeneous_contig_with_gaps_checksum( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_generic_simple_pack( ompi_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_generic_simple_pack_checksum( ompi_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_unpack_general( ompi_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_unpack_general_checksum( ompi_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_unpack_homogeneous_contig( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_unpack_homogeneous_contig_checksum( ompi_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_generic_simple_unpack( ompi_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); -OMPI_DECLSPEC int32_t +int32_t ompi_generic_simple_unpack_checksum( ompi_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data );