Open MPI logo

MTT Devel Mailing List Archives

  |   Home   |   Support   |   FAQ   |   all MTT Devel mailing list

Subject: [MTT devel] Fwd: [OMPI svn-full] svn:open-mpi r28142 - trunk/ompi/mca/common/cuda
From: Jeff Squyres (jsquyres) (jsquyres_at_[hidden])
Date: 2013-03-01 17:17:40


Mike -

Can you verify that this commit is ok?

Sent from my phone. No type good.

Begin forwarded message:

From: <svn-commit-mailer_at_[hidden]<mailto:svn-commit-mailer_at_[hidden]>>
Date: March 1, 2013, 4:50:56 PM EST
To: <svn-full_at_[hidden]<mailto:svn-full_at_[hidden]>>
Subject: [OMPI svn-full] svn:open-mpi r28142 - trunk/ompi/mca/common/cuda
Reply-To: <devel_at_[hidden]<mailto:devel_at_[hidden]>>

Author: rolfv (Rolf Vandevaart)
Date: 2013-03-01 16:50:56 EST (Fri, 01 Mar 2013)
New Revision: 28142
URL: https://svn.open-mpi.org/trac/ompi/changeset/28142

Log:
Add a search path. Refactor code.

Text files modified:
  trunk/ompi/mca/common/cuda/common_cuda.c | 116 +++++++++++++++++----------------------
  1 files changed, 52 insertions(+), 64 deletions(-)

Modified: trunk/ompi/mca/common/cuda/common_cuda.c
==============================================================================
--- trunk/ompi/mca/common/cuda/common_cuda.c Fri Mar 1 14:13:06 2013 (r28141)
+++ trunk/ompi/mca/common/cuda/common_cuda.c 2013-03-01 16:50:56 EST (Fri, 01 Mar 2013) (r28142)
@@ -463,11 +463,15 @@
 * This function will open and load the symbols needed from the CUDA driver
 * library. Any failure will result in a message and we will return 1.
 */
+#define NUMLIBS 2
static int mca_common_cuda_load_libcuda(void)
{
    opal_lt_dladvise advise;
- int retval;
+ int retval, i;
    int advise_support = 1;
+ bool loaded = true;
+ char *errs[NUMLIBS] = {NULL, NULL};
+ char *cudalibs[NUMLIBS] = {"libcuda.so", "libcuda.so.1"};

    if (0 != (retval = opal_lt_dlinit())) {
        if (OPAL_ERR_NOT_SUPPORTED == retval) {
@@ -491,6 +495,14 @@
        }
    }

+ /* Make sure we check in lib64 also in the case where there are both
+ * 32 and 64 bit libraries installed. Otherwise, we may fail trying to
+ * load the 32 bit library. */
+ opal_lt_dladdsearchdir("/usr/lib64");
+
+ /* Now walk through all the potential names libcuda and find one
+ * that works. If it does, all is good. If not, print out all
+ * the messages about why things failed. */
    if (advise_support) {
        if (0 != (retval = opal_lt_dladvise_global(&advise))) {
            opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
@@ -498,84 +510,60 @@
            opal_lt_dladvise_destroy(&advise);
            return 1;
        }
-
- /*
- * Try and open libcuda.so and libcuda.so.1. Note that we are not using
- * opal_lt_dladvise_ext() as we do not need ltdl to add any suffixes to
- * the library names being handed in.
- */
- libcuda_handle = opal_lt_dlopenadvise("libcuda.so", advise);
-
- /* If the first open fails, save the error message so that it can be printed
- * out of the second open fails as well. If the second open succeeds, then
- * we do not caer that the first open failed. */
- if (NULL == libcuda_handle) {
- char *err1;
- const char *str1 = opal_lt_dlerror();
- if (NULL != str1) {
- err1 = strdup(str1);
- } else {
- err1 = strdup("lt_dlerror() returned NULL.");
- }
- libcuda_handle = opal_lt_dlopenadvise("libcuda.so.1", advise);
+ for (i = 0; i < NUMLIBS; i++) {
+ const char *str;
+ libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
            if (NULL == libcuda_handle) {
- char *err2;
- const char *str2 = opal_lt_dlerror();
- if (NULL != str2) {
- err2 = strdup(str2);
+ str = opal_lt_dlerror();
+ if (NULL != str) {
+ errs[i] = strdup(str);
                } else {
- err2 = strdup("lt_dlerror() returned NULL.");
+ errs[i] = strdup("lt_dlerror() returned NULL.");
                }
- opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
- "libcuda.so", err1, "libcuda.so.1", err2);
- free(err1);
- free(err2);
- opal_lt_dladvise_destroy(&advise);
- return 1;
+ opal_output_verbose(10, mca_common_cuda_output,
+ "CUDA: Library open error: %s",
+ errs[i]);
+ } else {
+ loaded = true;
+ break;
            }
- free(err1);
        }
-
        opal_lt_dladvise_destroy(&advise);
    } else {
        /* No lt_dladvise support. This should rarely happen. */
- /*
- * Try and open libcuda.so and libcuda.so.1. Note that we are not using
- * opal_lt_dladvise_ext() as we do not need ltdl to add any suffixes to
- * the library names being handed in.
- */
- libcuda_handle = opal_lt_dlopen("libcuda.so");
-
- /* If the first open fails, save the error message so that it can be printed
- * out of the second open fails as well. If the second open succeeds, then
- * we do not caer that the first open failed. */
- if (NULL == libcuda_handle) {
- char *err1;
- const char *str1 = opal_lt_dlerror();
- if (NULL != str1) {
- err1 = strdup(str1);
- } else {
- err1 = strdup("lt_dlerror() returned NULL.");
- }
- libcuda_handle = opal_lt_dlopen("libcuda.so.1");
+ for (i = 0; i < NUMLIBS; i++) {
+ const char *str;
+ libcuda_handle = opal_lt_dlopen(cudalibs[i]);
            if (NULL == libcuda_handle) {
- char *err2;
- const char *str2 = opal_lt_dlerror();
- if (NULL != str2) {
- err2 = strdup(str2);
+ str = opal_lt_dlerror();
+ if (NULL != str) {
+ errs[i] = strdup(str);
                } else {
- err2 = strdup("lt_dlerror() returned NULL.");
+ errs[i] = strdup("lt_dlerror() returned NULL.");
                }
- opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
- "libcuda.so", err1, "libcuda.so.1", err2);
- free(err1);
- free(err2);
- return 1;
+ } else {
+ loaded = true;
+ break;
            }
- free(err1);
        }
    }

+ if (loaded != true) {
+ opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
+ cudalibs[0], errs[0], cudalibs[1], errs[1]);
+ }
+
+ /* Cleanup error messages. Need to do this after printing them. */
+ for (i = 0; i < NUMLIBS; i++) {
+ if (NULL != errs[i]) {
+ free(errs[i]);
+ }
+ }
+
+ if (loaded != true) {
+ return 1;
+ }
+
    /* Map in the functions that we need */
    OMPI_CUDA_DLSYM(libcuda_handle, cuStreamCreate);
    OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetCurrent);
_______________________________________________
svn-full mailing list
svn-full_at_[hidden]<mailto:svn-full_at_[hidden]>
http://www.open-mpi.org/mailman/listinfo.cgi/svn-full