Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r24977
From: Jeff Squyres (jsquyres) (jsquyres_at_[hidden])
Date: 2011-08-02 10:49:24


Te question that needs to be answered in the readme is: when should one se openib/ob1 vs. Mxm? Users will need to know thus.

Also see the part in the readme about te different PMLs - u might want to write more there.

Sent from my phone. No type good.

On Aug 2, 2011, at 10:30 AM, "miked_at_[hidden]" <miked_at_[hidden]> wrote:

> Author: miked
> Date: 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> New Revision: 24977
> URL: https://svn.open-mpi.org/trac/ompi/changeset/24977
>
> Log:
> code and readme updates, some refactoring
> Text files modified:
> trunk/NEWS | 1
> trunk/README | 5 ++
> trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c | 4 +-
> trunk/ompi/mca/mtl/mxm/mtl_mxm_probe.c | 16 ++++----
> trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c | 54 ++++++++++++++--------------
> trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h | 2
> trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c | 77 ++++++++++++++++-----------------------
> 7 files changed, 74 insertions(+), 85 deletions(-)
>
> Modified: trunk/NEWS
> ==============================================================================
> --- trunk/NEWS (original)
> +++ trunk/NEWS 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -62,6 +62,7 @@
> OPAL levels - intended for use when configuring without MPI support
> - Modified paffinity system to provide warning when bindings result in
> being "bound to all", which is equivalent to "not bound"
> +- Added Mellanox MTL layer implementation (mxm)
>
>
> 1.5.3
>
> Modified: trunk/README
> ==============================================================================
> --- trunk/README (original)
> +++ trunk/README 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -509,6 +509,9 @@
> or
> shell$ mpirun --mca pml cm ...
>
> +- MXM MTL is an transport layer utilizing various Mellanox proprietary
> + technologies and providing better scalability and performance for large scale jobs
> +
> - Myrinet MX (and Open-MX) support is shared between the 2 internal
> devices, the MTL and the BTL. The design of the BTL interface in
> Open MPI assumes that only naive one-sided communication
> @@ -707,7 +710,7 @@
> --with-mxm=<directory>
> Specify the directory where the Mellanox MXM library and
> header files are located. This option is generally only necessary
> - if the InfiniPath headers and libraries are not in default
> + if the MXM headers and libraries are not in default
> compiler/linker search paths.
>
> MXM is the support library for Mellanox network adapters.
>
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -18,9 +18,9 @@
> mxm_error_t err;
> mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
>
> - err = mxm_req_cancel(&mtl_mxm_request->mxm_request);
> + err = mxm_req_cancel(mtl_mxm_request->mxm_base_request);
> if (MXM_OK == err) {
> - err = mxm_req_test(&mtl_mxm_request->mxm_request);
> + err = mxm_req_test(mtl_mxm_request->mxm_base_request);
> if (MXM_OK == err) {
> mtl_request->ompi_req->req_status._cancelled = true;
> mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
>
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_probe.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_probe.c (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_probe.c 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -18,21 +18,21 @@
> int *flag, struct ompi_status_public_t *status)
> {
> mxm_error_t err;
> - mxm_req_t req;
> + mxm_recv_req_t req;
>
> - req.state = MXM_REQ_NEW;
> - req.mq = (mxm_mq_h)comm->c_pml_comm;
> - req.tag = tag;
> - req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
> - req.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
> + req.base.state = MXM_REQ_NEW;
> + req.base.mq = (mxm_mq_h)comm->c_pml_comm;
> + req.tag = tag;
> + req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
> + req.base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
>
> err = mxm_req_probe(&req);
> if (MXM_OK == err) {
> *flag = 1;
> if (MPI_STATUS_IGNORE != status) {
> - status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.conn);
> + status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.base.conn);
> status->MPI_TAG = req.completion.sender_tag;
> - status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req.completion.status);
> + status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(err);
> status->_ucount = req.completion.actual_len;
> }
> return OMPI_SUCCESS;
>
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -18,26 +18,27 @@
> #include "mtl_mxm_request.h"
>
>
> -static void ompi_mtl_mxm_recv_completion_cb(mxm_req_t *req)
> +static void ompi_mtl_mxm_recv_completion_cb(void *context)
> {
> - mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
> - struct ompi_request_t *ompi_req = mtl_mxm_request->super.ompi_req;
> + mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
> + struct ompi_request_t *ompi_req = req->super.ompi_req;
> + mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t *)req->mxm_base_request;
>
> /* Set completion status and envelope */
> - ompi_req->req_status.MPI_TAG = req->completion.sender_tag;
> - ompi_req->req_status.MPI_SOURCE = req->completion.sender_imm;
> - ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->completion.status);
> - ompi_req->req_status._ucount = req->completion.actual_len;
> + ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag;
> + ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
> + ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error);
> + ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len;
>
> /* Copy data */
> - ompi_mtl_datatype_unpack(mtl_mxm_request->convertor, mtl_mxm_request->buf,
> - req->completion.actual_len);
> + ompi_mtl_datatype_unpack(req->convertor, req->buf,
> + mxm_recv_req->completion.actual_len);
>
> - if (mtl_mxm_request->free_after) {
> - free(mtl_mxm_request->buf);
> + if (req->free_after) {
> + free(req->buf);
> }
>
> - mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
> + req->super.completion_callback(&req->super);
> }
>
>
> @@ -47,9 +48,8 @@
> struct mca_mtl_request_t *mtl_request)
> {
> mca_mtl_mxm_request_t * mtl_mxm_request;
> - mca_mtl_mxm_endpoint_t* mxm_endpoint;
> - ompi_proc_t* ompi_proc;
> mxm_error_t err;
> + mxm_recv_req_t *mxm_recv_req;
> int ret;
>
> mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
> @@ -63,22 +63,22 @@
> return ret;
> }
>
> + mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request;
> +
> /* prepare a receive request embedded in the MTL request */
> - mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
> - mtl_mxm_request->mxm_request.mq = (mxm_mq_h)comm->c_pml_comm;
> - mtl_mxm_request->mxm_request.tag = tag;
> - mtl_mxm_request->mxm_request.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
> - mtl_mxm_request->mxm_request.conn = (src == MPI_ANY_SOURCE) ? NULL :
> - ompi_mtl_mxm_conn_lookup(comm, src);
> -
> - mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
> - mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
> - mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_recv_completion_cb;
> - mtl_mxm_request->mxm_request.context = mtl_mxm_request;
> - mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
> + mxm_recv_req->base.state = MXM_REQ_NEW;
> + mxm_recv_req->base.mq = (mxm_mq_h)comm->c_pml_comm;
> + mxm_recv_req->tag = tag;
> + mxm_recv_req->tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
> + mxm_recv_req->base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
> +
> + mxm_recv_req->base.data.buffer.ptr = mtl_mxm_request->buf;
> + mxm_recv_req->base.data.buffer.length = mtl_mxm_request->length;
> + mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
> + mxm_recv_req->base.context = mtl_mxm_request;
>
> /* post-recv */
> - err = mxm_req_recv(&mtl_mxm_request->mxm_request);
> + err = mxm_req_recv(mxm_recv_req);
> if (MXM_OK != err) {
> orte_show_help("help-mtl-mxm.txt", "error posting receive", true,
> mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
>
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -16,7 +16,7 @@
>
> struct mca_mtl_mxm_request_t {
> struct mca_mtl_request_t super;
> - mxm_req_t mxm_request;
> + mxm_req_base_t *mxm_base_request;
> /* mxm_segment_t mxm_segment[1]; */
> void *buf;
> size_t length;
>
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c 2011-08-02 10:30:11 EDT (Tue, 02 Aug 2011)
> @@ -17,30 +17,15 @@
> #include "ompi/mca/mtl/base/mtl_base_datatype.h"
>
>
> -static void ompi_mtl_mxm_send_completion_cb(mxm_req_t *req)
> +static void ompi_mtl_mxm_send_completion_cb(void *context)
> {
> -
> - mca_mtl_mxm_request_t *mtl_mxm_request;
> - mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
> + mca_mtl_mxm_request_t *mtl_mxm_request = context;
>
> if (mtl_mxm_request->free_after) {
> free(mtl_mxm_request->buf);
> }
>
> - switch (req->completion.status) {
> - case MXM_OK:
> - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
> - = OMPI_SUCCESS;
> - break;
> - case MXM_ERR_MESSAGE_TRUNCATED:
> - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
> - = MPI_ERR_TRUNCATE;
> - break;
> - default:
> - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
> - = MPI_ERR_INTERN;
> - break;
> - }
> + mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error);
>
> mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
> }
> @@ -50,41 +35,38 @@
> struct opal_convertor_t *convertor,
> mca_pml_base_send_mode_t mode)
> {
> - mxm_req_t mxm_req;
> + mxm_send_req_t mxm_send_req;
> bool free_after;
> mxm_error_t err;
> int ret;
>
> /* prepare local send request */
> - mxm_req.state = MXM_REQ_NEW;
> - mxm_req.mq = ompi_mtl_mxm_mq_lookup(comm);
> - mxm_req.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
> - mxm_req.tag = tag;
> - mxm_req.imm_data = ompi_comm_rank(comm);
> - mxm_req.completed_cb = NULL;
> - mxm_req.flags = 0;
> + mxm_send_req.base.state = MXM_REQ_NEW;
> + mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm);
> + mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
> + mxm_send_req.op.send.tag = tag;
> + mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
> + mxm_send_req.base.completed_cb = NULL;
> + mxm_send_req.base.flags = MXM_REQ_FLAG_WAIT;
> +
> if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
> - mxm_req.flags |= MXM_REQ_FLAG_SEND_SYNC;
> + mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC;
> }
> - ret = ompi_mtl_datatype_pack(convertor, &mxm_req.data.buf.ptr, &mxm_req.data.buf.len,
> + ret = ompi_mtl_datatype_pack(convertor, &mxm_send_req.base.data.buffer.ptr, &mxm_send_req.base.data.buffer.length,
> &free_after);
> if (OMPI_SUCCESS != ret) {
> return ret;
> }
>
> /* post-send */
> - err = mxm_req_send(&mxm_req);
> + err = mxm_req_send(&mxm_send_req);
> if (MXM_OK != err) {
> orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
> return OMPI_ERROR;
> }
>
> /* wait for request completion */
> - err = mxm_req_wait(&mxm_req);
> - if (MXM_OK != err) {
> - orte_show_help("help-mtl-mxm.txt", "error while waiting in send", true, mxm_error_string(err));
> - return OMPI_ERROR;
> - }
> + mxm_req_wait(&mxm_send_req.base);
>
> return OMPI_SUCCESS;
> }
> @@ -96,6 +78,7 @@
> mca_mtl_request_t * mtl_request)
> {
> mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request;
> + mxm_send_req_t *mxm_send_req;
> mxm_error_t err;
> int ret;
>
> @@ -110,23 +93,25 @@
> return ret;
> }
>
> + mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request;
> +
> /* prepare a send request embedded in the MTL request */
> - mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
> - mtl_mxm_request->mxm_request.mq = ompi_mtl_mxm_mq_lookup(comm);
> - mtl_mxm_request->mxm_request.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
> - mtl_mxm_request->mxm_request.tag = tag;
> - mtl_mxm_request->mxm_request.imm_data = ompi_comm_rank(comm);
> - mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
> - mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
> - mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_send_completion_cb;
> - mtl_mxm_request->mxm_request.context = mtl_mxm_request;
> - mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
> + mxm_send_req->base.state = MXM_REQ_NEW;
> + mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm);
> + mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
> + mxm_send_req->op.send.tag = tag;
> + mxm_send_req->op.send.imm_data = ompi_comm_rank(comm);
> + mxm_send_req->base.data.buffer.ptr = mtl_mxm_request->buf;
> + mxm_send_req->base.data.buffer.length = mtl_mxm_request->length;
> + mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
> + mxm_send_req->base.context = mtl_mxm_request;
> +
> if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
> - mtl_mxm_request->mxm_request.flags |= MXM_REQ_FLAG_SEND_SYNC;
> + mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC;
> }
>
> /* post-send */
> - err = mxm_req_send(&mtl_mxm_request->mxm_request);
> + err = mxm_req_send(mxm_send_req);
> if (MXM_OK != err) {
> orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
> return OMPI_ERROR;
> _______________________________________________
> svn-full mailing list
> svn-full_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full