1 files changed, 92 insertions, 0 deletions
diff --git a/Tests/Cuda/ProperDeviceLibraries/main.cu b/Tests/Cuda/ProperDeviceLibraries/main.cu
new file mode 100644
index 0000000..8ceb0cc
--- /dev/null
+++ b/Tests/Cuda/ProperDeviceLibraries/main.cu
@@ -0,0 +1,92 @@
+
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+#if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)
+
+#  include <pthread.h>
+static int verify_linking_to_pthread()
+{
+  return static_cast<int>(pthread_self());
+}
+#endif
+
+// this test only makes sense for versions of CUDA that ships
+// static libraries that have separable compilation device symbols
+#if __CUDACC_VER_MAJOR__ <= 9
+__global__ void deviceCublasSgemm(int n, float alpha, float beta,
+                                  const float* d_A, const float* d_B,
+                                  float* d_C)
+{
+  cublasHandle_t cnpHandle;
+  cublasStatus_t status = cublasCreate(&cnpHandle);
+
+  if (status != CUBLAS_STATUS_SUCCESS) {
+    return;
+  }
+
+  // Call function defined in the cublas_device system static library.
+  // This way we can verify that we properly pass system libraries to the
+  // device link line
+  status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
+                       d_A, n, d_B, n, &beta, d_C, n);
+
+  cublasDestroy(cnpHandle);
+}
+#endif
+
+int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  for (int i = 0; i < nDevices; ++i) {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess) {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+    }
+
+    if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
+      err = cudaSetDevice(i);
+      if (err != cudaSuccess) {
+        std::cout << "Could not select CUDA device " << i << std::endl;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
+            << std::endl;
+  return 1;
+}
+
+int main(int argc, char** argv)
+{
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+#if __CUDACC_VER_MAJOR__ <= 9
+  // initial values that will make sure that the cublasSgemm won't actually
+  // do any work
+  int n = 0;
+  float alpha = 1;
+  float beta = 1;
+  float* d_A = nullptr;
+  float* d_B = nullptr;
+  float* d_C = nullptr;
+  deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
+#endif
+
+  return 0;
+}