Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn-full] svn:open-mpi r25005
From: Mike Dubman (mike.ompi_at_[hidden])
Date: 2011-08-09 03:11:44


Thanks for comment.
fixed it.

On Mon, Aug 8, 2011 at 6:28 PM, Jeff Squyres <jsquyres_at_[hidden]> wrote:

> Mike --
>
> Does mxm_init() do Reasonable Things to check to see if the local
> OpenFabrics-capable devices are unsuitable for MXM? E.g., does it check to
> see if the local OpenFabrics devices are MXM-capable, and if not, fail
> gracefully?
>
> Also, I would suggest NOT showing a show_help message if there are OF
> devices available such that CM/MXM can (probably) fail over to OB1/openib.
> I.e., only show a show_help message if devices are available for MXM, but
> an actual error occurs during the MXM initialization.
>
> Otherwise, if I mpirun (with the MXM MTL installed) on a system with only
> RoCE or iWARP devices present, MXM will complain but then fail over to
> OB1/openib. That would probably be confusing.
>
>
>
> On Aug 7, 2011, at 8:06 AM, miked_at_[hidden] wrote:
>
> > Author: miked
> > Date: 2011-08-07 08:06:49 EDT (Sun, 07 Aug 2011)
> > New Revision: 25005
> > URL: https://svn.open-mpi.org/trac/ompi/changeset/25005
> >
> > Log:
> > better mxm selection mechanism, some refactoring
> > Text files modified:
> > trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c | 4 ++--
> > trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c | 32
> ++++++++++++++------------------
> > trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c | 6 +++---
> > trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h | 6 +++++-
> > trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c | 4 ++--
> > 5 files changed, 26 insertions(+), 26 deletions(-)
> >
> > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c
> >
> ==============================================================================
> > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c (original)
> > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c 2011-08-07 08:06:49 EDT
> (Sun, 07 Aug 2011)
> > @@ -18,9 +18,9 @@
> > mxm_error_t err;
> > mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*)
> mtl_request;
> >
> > - err = mxm_req_cancel(mtl_mxm_request->mxm_base_request);
> > + err = mxm_req_cancel(&mtl_mxm_request->mxm.base);
> > if (MXM_OK == err) {
> > - err = mxm_req_test(mtl_mxm_request->mxm_base_request);
> > + err = mxm_req_test(&mtl_mxm_request->mxm.base);
> > if (MXM_OK == err) {
> > mtl_request->ompi_req->req_status._cancelled = true;
> >
> mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
> >
> > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c
> >
> ==============================================================================
> > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c (original)
> > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c 2011-08-07 08:06:49
> EDT (Sun, 07 Aug 2011)
> > @@ -72,18 +72,27 @@
> >
> > static int ompi_mtl_mxm_component_open(void)
> > {
> > - struct stat st;
> >
> > - /* Component available only if IB hardware is present */
> > - if (0 == stat("/dev/infiniband/uverbs0", &st)) {
> > - return OMPI_SUCCESS;
> > - } else {
> > + mxm_context_opts_t mxm_opts;
> > + mxm_error_t err;
> > +
> > + mca_mtl_mxm_output = opal_output_open(NULL);
> > + opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
> > +
> > + mxm_fill_context_opts(&mxm_opts);
> > + err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context);
> > + if (MXM_OK != err) {
> > + orte_show_help("help-mtl-mxm.txt", "mxm init", true,
> > + mxm_error_string(err));
> > return OPAL_ERR_NOT_AVAILABLE;
> > }
> > + return OMPI_SUCCESS;
> > }
> >
> > static int ompi_mtl_mxm_component_close(void)
> > {
> > + mxm_cleanup(ompi_mtl_mxm.mxm_context);
> > + ompi_mtl_mxm.mxm_context = NULL;
> > return OMPI_SUCCESS;
> > }
> >
> > @@ -91,21 +100,8 @@
> > ompi_mtl_mxm_component_init(bool enable_progress_threads,
> > bool enable_mpi_threads)
> > {
> > - mxm_context_opts_t mxm_opts;
> > - mxm_error_t err;
> > int rc;
> >
> > - mca_mtl_mxm_output = opal_output_open(NULL);
> > - opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
> > -
> > - mxm_fill_context_opts(&mxm_opts);
> > - err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context);
> > - if (MXM_OK != err) {
> > - orte_show_help("help-mtl-mxm.txt", "mxm init", true,
> > - mxm_error_string(err));
> > - return NULL;
> > - }
> > -
> > rc = ompi_mtl_mxm_module_init();
> > if (OMPI_SUCCESS != rc) {
> > return NULL;
> >
> > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c
> >
> ==============================================================================
> > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c (original)
> > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c 2011-08-07 08:06:49 EDT
> (Sun, 07 Aug 2011)
> > @@ -22,12 +22,12 @@
> > {
> > mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
> > struct ompi_request_t *ompi_req = req->super.ompi_req;
> > - mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t
> *)req->mxm_base_request;
> > + mxm_recv_req_t *mxm_recv_req = &req->mxm.recv;
> >
> > /* Set completion status and envelope */
> > ompi_req->req_status.MPI_TAG =
> mxm_recv_req->completion.sender_tag;
> > ompi_req->req_status.MPI_SOURCE =
> mxm_recv_req->completion.sender_imm;
> > - ompi_req->req_status.MPI_ERROR =
> ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error);
> > + ompi_req->req_status.MPI_ERROR =
> ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error);
> > ompi_req->req_status._ucount =
> mxm_recv_req->completion.actual_len;
> >
> > /* Copy data */
> > @@ -63,7 +63,7 @@
> > return ret;
> > }
> >
> > - mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request;
> > + mxm_recv_req = &mtl_mxm_request->mxm.recv;
> >
> > /* prepare a receive request embedded in the MTL request */
> > mxm_recv_req->base.state = MXM_REQ_NEW;
> >
> > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h
> >
> ==============================================================================
> > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h (original)
> > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h 2011-08-07 08:06:49 EDT
> (Sun, 07 Aug 2011)
> > @@ -16,7 +16,11 @@
> >
> > struct mca_mtl_mxm_request_t {
> > struct mca_mtl_request_t super;
> > - mxm_req_base_t *mxm_base_request;
> > + union {
> > + mxm_req_base_t base;
> > + mxm_send_req_t send;
> > + mxm_recv_req_t recv;
> > + } mxm;
> > /* mxm_segment_t mxm_segment[1]; */
> > void *buf;
> > size_t length;
> >
> > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c
> >
> ==============================================================================
> > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c (original)
> > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c 2011-08-07 08:06:49 EDT
> (Sun, 07 Aug 2011)
> > @@ -25,7 +25,7 @@
> > free(mtl_mxm_request->buf);
> > }
> >
> > - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR =
> ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error);
> > + mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR =
> ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error);
> >
> > mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
> > }
> > @@ -93,7 +93,7 @@
> > return ret;
> > }
> >
> > - mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request;
> > + mxm_send_req = &mtl_mxm_request->mxm.send;
> >
> > /* prepare a send request embedded in the MTL request */
> > mxm_send_req->base.state = MXM_REQ_NEW;
> > _______________________________________________
> > svn-full mailing list
> > svn-full_at_[hidden]
> > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full
>
>
> --
> Jeff Squyres
> jsquyres_at_[hidden]
> For corporate legal information go to:
> http://www.cisco.com/web/about/doing_business/legal/cri/
>
>
> _______________________________________________
> devel mailing list
> devel_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>