Mike -
Can you verify that this commit is ok?
Sent from my phone. No type good.
Begin forwarded message:
From: <svn-commit-mailer_at_[hidden]<mailto:svn-commit-mailer_at_[hidden]>>
Date: March 1, 2013, 4:50:56 PM EST
To: <svn-full_at_[hidden]<mailto:svn-full_at_[hidden]>>
Subject: [OMPI svn-full] svn:open-mpi r28142 - trunk/ompi/mca/common/cuda
Reply-To: <devel_at_[hidden]<mailto:devel_at_[hidden]>>
Author: rolfv (Rolf Vandevaart)
Date: 2013-03-01 16:50:56 EST (Fri, 01 Mar 2013)
New Revision: 28142
URL: https://svn.open-mpi.org/trac/ompi/changeset/28142
Log:
Add a search path. Refactor code.
Text files modified:
trunk/ompi/mca/common/cuda/common_cuda.c | 116 +++++++++++++++++----------------------
1 files changed, 52 insertions(+), 64 deletions(-)
Modified: trunk/ompi/mca/common/cuda/common_cuda.c
==============================================================================
--- trunk/ompi/mca/common/cuda/common_cuda.c Fri Mar 1 14:13:06 2013 (r28141)
+++ trunk/ompi/mca/common/cuda/common_cuda.c 2013-03-01 16:50:56 EST (Fri, 01 Mar 2013) (r28142)
@@ -463,11 +463,15 @@
* This function will open and load the symbols needed from the CUDA driver
* library. Any failure will result in a message and we will return 1.
*/
+#define NUMLIBS 2
static int mca_common_cuda_load_libcuda(void)
{
opal_lt_dladvise advise;
- int retval;
+ int retval, i;
int advise_support = 1;
+ bool loaded = true;
+ char *errs[NUMLIBS] = {NULL, NULL};
+ char *cudalibs[NUMLIBS] = {"libcuda.so", "libcuda.so.1"};
if (0 != (retval = opal_lt_dlinit())) {
if (OPAL_ERR_NOT_SUPPORTED == retval) {
@@ -491,6 +495,14 @@
}
}
+ /* Make sure we check in lib64 also in the case where there are both
+ * 32 and 64 bit libraries installed. Otherwise, we may fail trying to
+ * load the 32 bit library. */
+ opal_lt_dladdsearchdir("/usr/lib64");
+
+ /* Now walk through all the potential names libcuda and find one
+ * that works. If it does, all is good. If not, print out all
+ * the messages about why things failed. */
if (advise_support) {
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
@@ -498,84 +510,60 @@
opal_lt_dladvise_destroy(&advise);
return 1;
}
-
- /*
- * Try and open libcuda.so and libcuda.so.1. Note that we are not using
- * opal_lt_dladvise_ext() as we do not need ltdl to add any suffixes to
- * the library names being handed in.
- */
- libcuda_handle = opal_lt_dlopenadvise("libcuda.so", advise);
-
- /* If the first open fails, save the error message so that it can be printed
- * out of the second open fails as well. If the second open succeeds, then
- * we do not caer that the first open failed. */
- if (NULL == libcuda_handle) {
- char *err1;
- const char *str1 = opal_lt_dlerror();
- if (NULL != str1) {
- err1 = strdup(str1);
- } else {
- err1 = strdup("lt_dlerror() returned NULL.");
- }
- libcuda_handle = opal_lt_dlopenadvise("libcuda.so.1", advise);
+ for (i = 0; i < NUMLIBS; i++) {
+ const char *str;
+ libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
if (NULL == libcuda_handle) {
- char *err2;
- const char *str2 = opal_lt_dlerror();
- if (NULL != str2) {
- err2 = strdup(str2);
+ str = opal_lt_dlerror();
+ if (NULL != str) {
+ errs[i] = strdup(str);
} else {
- err2 = strdup("lt_dlerror() returned NULL.");
+ errs[i] = strdup("lt_dlerror() returned NULL.");
}
- opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
- "libcuda.so", err1, "libcuda.so.1", err2);
- free(err1);
- free(err2);
- opal_lt_dladvise_destroy(&advise);
- return 1;
+ opal_output_verbose(10, mca_common_cuda_output,
+ "CUDA: Library open error: %s",
+ errs[i]);
+ } else {
+ loaded = true;
+ break;
}
- free(err1);
}
-
opal_lt_dladvise_destroy(&advise);
} else {
/* No lt_dladvise support. This should rarely happen. */
- /*
- * Try and open libcuda.so and libcuda.so.1. Note that we are not using
- * opal_lt_dladvise_ext() as we do not need ltdl to add any suffixes to
- * the library names being handed in.
- */
- libcuda_handle = opal_lt_dlopen("libcuda.so");
-
- /* If the first open fails, save the error message so that it can be printed
- * out of the second open fails as well. If the second open succeeds, then
- * we do not caer that the first open failed. */
- if (NULL == libcuda_handle) {
- char *err1;
- const char *str1 = opal_lt_dlerror();
- if (NULL != str1) {
- err1 = strdup(str1);
- } else {
- err1 = strdup("lt_dlerror() returned NULL.");
- }
- libcuda_handle = opal_lt_dlopen("libcuda.so.1");
+ for (i = 0; i < NUMLIBS; i++) {
+ const char *str;
+ libcuda_handle = opal_lt_dlopen(cudalibs[i]);
if (NULL == libcuda_handle) {
- char *err2;
- const char *str2 = opal_lt_dlerror();
- if (NULL != str2) {
- err2 = strdup(str2);
+ str = opal_lt_dlerror();
+ if (NULL != str) {
+ errs[i] = strdup(str);
} else {
- err2 = strdup("lt_dlerror() returned NULL.");
+ errs[i] = strdup("lt_dlerror() returned NULL.");
}
- opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
- "libcuda.so", err1, "libcuda.so.1", err2);
- free(err1);
- free(err2);
- return 1;
+ } else {
+ loaded = true;
+ break;
}
- free(err1);
}
}
+ if (loaded != true) {
+ opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
+ cudalibs[0], errs[0], cudalibs[1], errs[1]);
+ }
+
+ /* Cleanup error messages. Need to do this after printing them. */
+ for (i = 0; i < NUMLIBS; i++) {
+ if (NULL != errs[i]) {
+ free(errs[i]);
+ }
+ }
+
+ if (loaded != true) {
+ return 1;
+ }
+
/* Map in the functions that we need */
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamCreate);
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetCurrent);
_______________________________________________
svn-full mailing list
svn-full_at_[hidden]<mailto:svn-full_at_[hidden]>
http://www.open-mpi.org/mailman/listinfo.cgi/svn-full
|