Open MPI logo

Open MPI Development Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all Development mailing list

Subject: Re: [OMPI devel] [OMPI svn] svn:open-mpi r29644 - trunk/orte/mca/rmaps/mindist
From: Ralph Castain (rhc_at_[hidden])
Date: 2013-11-07 23:49:56


Ummm...you can't do that, Josh. You are violating the abstraction break rather badly by searching for specific IB devices down in ORTE.

Please revert this and let's talk about what you are actually trying to do.

On Nov 7, 2013, at 8:28 PM, svn-commit-mailer_at_[hidden] wrote:

> Author: jladd (Joshua Ladd)
> Date: 2013-11-07 23:28:53 EST (Thu, 07 Nov 2013)
> New Revision: 29644
> URL: https://svn.open-mpi.org/trac/ompi/changeset/29644
>
> Log:
> Adds a check in the mindist mapper for whether or not the user asks for a specific device. This patch was submited by Elena Elkina and reviewed by Josh Ladd and should be added to
>
> cmr=v1.7.4:reviewer=jladd
>
> Text files modified:
> trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c | 65 ++++++++++++++++++++++++++++++++++++---
> 1 files changed, 60 insertions(+), 5 deletions(-)
>
> Modified: trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c
> ==============================================================================
> --- trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c Thu Nov 7 23:21:05 2013 (r29643)
> +++ trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c 2013-11-07 23:28:53 EST (Thu, 07 Nov 2013) (r29644)
> @@ -47,6 +47,52 @@
> mindist_map
> };
>
> +static int num_devices_in_list(char *list)
> +{
> + int count = 0;
> + list = strtok(list, ",");
> + while (NULL != list) {
> + ++count;
> + list = strtok(NULL, ",");
> + }
> + return count;
> +}
> +
> +static char* get_hca_name(orte_app_context_t *app)
> +{
> + int found_ind = -1;
> + char** env = app->env;
> + int i;
> + for (i = 0; env[i]; i++) {
> + if (strstr(env[i], "OMPI_MCA_btl_openib_if_include") != NULL) {
> + found_ind = i;
> + break;
> + }
> + }
> + if (found_ind == -1) {
> + for (i = 0; env[i]; i++) {
> + if (strstr(env[i], "MXM_RDMA_PORTS") != NULL) {
> + found_ind = i;
> + break;
> + }
> + }
> + }
> + if (found_ind != -1) {
> + char* start = strstr(env[found_ind], "=");
> + if (start != NULL) {
> + start = strdup(start+sizeof(char));
> + if (num_devices_in_list(start) == 1) {
> + return strtok(start, ":");
> + }
> + else {
> + free(start);
> + return NULL;
> + }
> + }
> + }
> + return NULL;
> +}
> +
> /*
> * Create a round-robin mapping for the job.
> */
> @@ -248,10 +294,17 @@
> OBJ_CONSTRUCT(&numa_list, opal_list_t);
> ret = opal_hwloc_get_sorted_numa_list(node->topology, orte_rmaps_base.device, &numa_list);
> if (ret > 1) {
> - orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:several-hca-devices",
> - true, ret, node->name);
> - rc = ORTE_ERR_SILENT;
> - goto error;
> + /* check if hca device is specified via openib or mxm parameter */
> + free(orte_rmaps_base.device);
> + orte_rmaps_base.device = get_hca_name(app);
> + if (orte_rmaps_base.device != NULL) {
> + ret = opal_hwloc_get_sorted_numa_list(node->topology, orte_rmaps_base.device, &numa_list);
> + } else {
> + orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:several-hca-devices",
> + true, ret, node->name);
> + rc = ORTE_ERR_SILENT;
> + goto error;
> + }
> } else if (ret < 0) {
> orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:device-not-found",
> true, orte_rmaps_base.device, node->name);
> @@ -402,7 +455,9 @@
> }
> OBJ_DESTRUCT(&node_list);
> }
> - free(orte_rmaps_base.device);
> + if (orte_rmaps_base.device != NULL) {
> + free(orte_rmaps_base.device);
> + }
> return ORTE_SUCCESS;
>
> error:
> _______________________________________________
> svn mailing list
> svn_at_[hidden]
> http://www.open-mpi.org/mailman/listinfo.cgi/svn