summaryrefslogtreecommitdiffstats
path: root/Tests/Cuda
diff options
context:
space:
mode:
authorRobert Maynard <robert.maynard@kitware.com>2018-10-22 14:54:44 (GMT)
committerBrad King <brad.king@kitware.com>2018-10-24 14:15:48 (GMT)
commit2cc050b53b4afb1ed62621360b860e25b7c46015 (patch)
treec2446498b1c4db63138987a8d62390c5c1478e34 /Tests/Cuda
parent83c13ca44f661ba22acf4abe63d84fd5651b4dbc (diff)
downloadCMake-2cc050b53b4afb1ed62621360b860e25b7c46015.zip
CMake-2cc050b53b4afb1ed62621360b860e25b7c46015.tar.gz
CMake-2cc050b53b4afb1ed62621360b860e25b7c46015.tar.bz2
CUDA: Add test for device linking when host linking uses threads
Convert the `CudaOnly.LinkSystemDeviceLibraries` test to a new `Cuda.ProperDeviceLibraries` test. The former covered only the `cublas_device` library which is removed by CUDA 10. Extend the new test to also cover various cases of using threads. Issue: #18008
Diffstat (limited to 'Tests/Cuda')
-rw-r--r--Tests/Cuda/CMakeLists.txt1
-rw-r--r--Tests/Cuda/ProperDeviceLibraries/CMakeLists.txt45
-rw-r--r--Tests/Cuda/ProperDeviceLibraries/main.cu92
-rw-r--r--Tests/Cuda/ProperDeviceLibraries/use_pthreads.cu9
-rw-r--r--Tests/Cuda/ProperDeviceLibraries/use_pthreads.cxx9
5 files changed, 156 insertions, 0 deletions
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
index 8a43df5..1b3daa6 100644
--- a/Tests/Cuda/CMakeLists.txt
+++ b/Tests/Cuda/CMakeLists.txt
@@ -4,5 +4,6 @@ ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
ADD_TEST_MACRO(Cuda.MixedStandardLevels MixedStandardLevels)
ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude)
+ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
ADD_TEST_MACRO(Cuda.WithC CudaWithC)
diff --git a/Tests/Cuda/ProperDeviceLibraries/CMakeLists.txt b/Tests/Cuda/ProperDeviceLibraries/CMakeLists.txt
new file mode 100644
index 0000000..cb47b09
--- /dev/null
+++ b/Tests/Cuda/ProperDeviceLibraries/CMakeLists.txt
@@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 3.13)
+project(ProperDeviceLibraries CXX CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_35,code=compute_35 -gencode arch=compute_35,code=sm_35")
+set(CMAKE_CUDA_STANDARD 11)
+
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+find_package(Threads)
+
+add_executable(ProperDeviceLibraries main.cu)
+set_target_properties(ProperDeviceLibraries
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+add_library(UseThreadsMixed SHARED use_pthreads.cxx use_pthreads.cu)
+target_link_libraries(UseThreadsMixed Threads::Threads)
+
+add_library(UseThreadsCuda SHARED use_pthreads.cu)
+target_link_libraries(UseThreadsCuda Threads::Threads)
+
+target_link_libraries(ProperDeviceLibraries PRIVATE UseThreadsMixed UseThreadsCuda)
+
+if(THREADS_HAVE_PTHREAD_ARG AND CMAKE_USE_PTHREADS_INIT)
+ add_library(UseExplicitPThreadsFlag SHARED use_pthreads.cu)
+ target_compile_options(UseExplicitPThreadsFlag PUBLIC "-Xcompiler=-pthread")
+ target_link_libraries(UseExplicitPThreadsFlag PUBLIC "-pthread")
+
+ add_library(UseExplicitLThreadsFlag SHARED use_pthreads.cu)
+ target_compile_options(UseExplicitLThreadsFlag PUBLIC "-Xcompiler=-pthread")
+ target_link_libraries(UseExplicitLThreadsFlag PUBLIC "-lpthread")
+
+ add_library(UseExplicitLongThreadsFlag SHARED use_pthreads.cu)
+ target_link_libraries(UseExplicitLongThreadsFlag PUBLIC "--library pthread")
+
+ target_link_libraries(ProperDeviceLibraries PRIVATE UseExplicitPThreadsFlag UseExplicitLThreadsFlag UseExplicitLongThreadsFlag)
+endif()
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10.0.0)
+ #CUDA 10 removed the cublas_device library
+ target_link_libraries(ProperDeviceLibraries PRIVATE cublas_device)
+endif()
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET ProperDeviceLibraries PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/ProperDeviceLibraries/main.cu b/Tests/Cuda/ProperDeviceLibraries/main.cu
new file mode 100644
index 0000000..8ceb0cc
--- /dev/null
+++ b/Tests/Cuda/ProperDeviceLibraries/main.cu
@@ -0,0 +1,92 @@
+
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+#if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)
+
+# include <pthread.h>
+static int verify_linking_to_pthread()
+{
+ return static_cast<int>(pthread_self());
+}
+#endif
+
+// this test only makes sense for versions of CUDA that ships
+// static libraries that have separable compilation device symbols
+#if __CUDACC_VER_MAJOR__ <= 9
+__global__ void deviceCublasSgemm(int n, float alpha, float beta,
+ const float* d_A, const float* d_B,
+ float* d_C)
+{
+ cublasHandle_t cnpHandle;
+ cublasStatus_t status = cublasCreate(&cnpHandle);
+
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ return;
+ }
+
+ // Call function defined in the cublas_device system static library.
+ // This way we can verify that we properly pass system libraries to the
+ // device link line
+ status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
+ d_A, n, d_B, n, &beta, d_C, n);
+
+ cublasDestroy(cnpHandle);
+}
+#endif
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+
+ if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
+ << std::endl;
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+#if __CUDACC_VER_MAJOR__ <= 9
+ // initial values that will make sure that the cublasSgemm won't actually
+ // do any work
+ int n = 0;
+ float alpha = 1;
+ float beta = 1;
+ float* d_A = nullptr;
+ float* d_B = nullptr;
+ float* d_C = nullptr;
+ deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
+#endif
+
+ return 0;
+}
diff --git a/Tests/Cuda/ProperDeviceLibraries/use_pthreads.cu b/Tests/Cuda/ProperDeviceLibraries/use_pthreads.cu
new file mode 100644
index 0000000..c57b8a8
--- /dev/null
+++ b/Tests/Cuda/ProperDeviceLibraries/use_pthreads.cu
@@ -0,0 +1,9 @@
+
+#if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)
+
+# include <pthread.h>
+static int verify_linking_to_pthread_cuda()
+{
+ return static_cast<int>(pthread_self());
+}
+#endif
diff --git a/Tests/Cuda/ProperDeviceLibraries/use_pthreads.cxx b/Tests/Cuda/ProperDeviceLibraries/use_pthreads.cxx
new file mode 100644
index 0000000..dc7c208
--- /dev/null
+++ b/Tests/Cuda/ProperDeviceLibraries/use_pthreads.cxx
@@ -0,0 +1,9 @@
+
+#if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)
+
+# include <pthread.h>
+static int verify_linking_to_pthread_cxx()
+{
+ return static_cast<int>(pthread_self());
+}
+#endif