diff options
Diffstat (limited to 'Tests/Cuda/Complex')
-rw-r--r-- | Tests/Cuda/Complex/CMakeLists.txt | 8 | ||||
-rw-r--r-- | Tests/Cuda/Complex/dynamic.cu | 43 | ||||
-rw-r--r-- | Tests/Cuda/Complex/file3.cu | 36 | ||||
-rw-r--r-- | Tests/Cuda/Complex/main.cpp | 12 | ||||
-rw-r--r-- | Tests/Cuda/Complex/mixed.cu | 36 |
5 files changed, 119 insertions, 16 deletions
diff --git a/Tests/Cuda/Complex/CMakeLists.txt b/Tests/Cuda/Complex/CMakeLists.txt index bff7d07..450ef48 100644 --- a/Tests/Cuda/Complex/CMakeLists.txt +++ b/Tests/Cuda/Complex/CMakeLists.txt @@ -32,9 +32,17 @@ add_library(CudaComplexSharedLib SHARED dynamic.cu) target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase) add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu) +set_target_properties(CudaComplexMixedLib + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) target_link_libraries(CudaComplexMixedLib PUBLIC CudaComplexSharedLib PRIVATE CudaComplexSeperableLib) add_executable(CudaComplex main.cpp) target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib) + +if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + target_link_libraries(CudaComplex PRIVATE -Wl,-rpath,/usr/local/cuda/lib) +endif() diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu index 82255c5..f677868 100644 --- a/Tests/Cuda/Complex/dynamic.cu +++ b/Tests/Cuda/Complex/dynamic.cu @@ -20,11 +20,50 @@ static __global__ void DetermineIfValidCudaDevice() { } +EXPORT int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 4) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + EXPORT void cuda_dynamic_lib_func() { DetermineIfValidCudaDevice<<<1, 1>>>(); cudaError_t err = cudaGetLastError(); - if (err == cudaSuccess) { - std::cerr << cudaGetErrorString(err) << std::endl; + if (err != cudaSuccess) { + std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: " + << cudaGetErrorString(err) << std::endl; + } + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: " + << cudaGetErrorString(cudaGetLastError()) << std::endl; } } diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu index 7c37d66..d055b42 100644 --- a/Tests/Cuda/Complex/file3.cu +++ b/Tests/Cuda/Complex/file3.cu @@ -7,20 +7,42 @@ result_type __device__ file1_func(int x); result_type_dynamic __device__ file2_func(int x); -static __global__ void file3_kernel(result_type& r, int x) +static __global__ void file3_kernel(result_type* r, int x) { - r = file1_func(x); + *r = file1_func(x); result_type_dynamic rd = file2_func(x); } int file3_launch_kernel(int x) { - result_type r; + result_type* r; + cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); + if (err != cudaSuccess) { + std::cerr << "file3_launch_kernel: cudaMallocManaged failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + file3_kernel<<<1, 1>>>(r, x); - cudaError_t err = cudaGetLastError(); - if (err == cudaSuccess) { - std::cerr << cudaGetErrorString(err) << std::endl; + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file3_kernel [SYNC] failed: " << cudaGetErrorString(err) + << std::endl; + return x; + } + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "file3_kernel [ASYNC] failed: " + << cudaGetErrorString(cudaGetLastError()) << std::endl; + return x; + } + int result = r->sum; + err = cudaFree(r); + if (err != cudaSuccess) { + std::cerr << "file3_launch_kernel: cudaFree failed: " + << cudaGetErrorString(err) << std::endl; return x; } - return r.sum; + + return result; } diff --git a/Tests/Cuda/Complex/main.cpp b/Tests/Cuda/Complex/main.cpp index 5a3f820..2498235 100644 --- a/Tests/Cuda/Complex/main.cpp +++ b/Tests/Cuda/Complex/main.cpp @@ -9,12 +9,18 @@ #define IMPORT #endif +IMPORT int choose_cuda_device(); IMPORT int call_cuda_seperable_code(int x); IMPORT int mixed_launch_kernel(int x); int main(int argc, char** argv) { - call_cuda_seperable_code(42); - mixed_launch_kernel(42); - return 0; + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + int r1 = call_cuda_seperable_code(42); + int r2 = mixed_launch_kernel(42); + return (r1 == 42 || r2 == 42) ? 1 : 0; } diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu index 4bba07c..a81ccb7 100644 --- a/Tests/Cuda/Complex/mixed.cu +++ b/Tests/Cuda/Complex/mixed.cu @@ -17,9 +17,9 @@ result_type_dynamic __device__ file2_func(int x); IMPORT void __host__ cuda_dynamic_lib_func(); -static __global__ void mixed_kernel(result_type& r, int x) +static __global__ void mixed_kernel(result_type* r, int x) { - r = file1_func(x); + *r = file1_func(x); result_type_dynamic rd = file2_func(x); } @@ -27,7 +27,35 @@ EXPORT int mixed_launch_kernel(int x) { cuda_dynamic_lib_func(); - result_type r; + result_type* r; + cudaError_t err = cudaMallocManaged(&r, sizeof(result_type)); + if (err != cudaSuccess) { + std::cerr << "mixed_launch_kernel: cudaMallocManaged failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + mixed_kernel<<<1, 1>>>(r, x); - return r.sum; + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "mixed_kernel [SYNC] failed: " << cudaGetErrorString(err) + << std::endl; + return x; + } + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + std::cerr << "mixed_kernel [ASYNC] failed: " + << cudaGetErrorString(cudaGetLastError()) << std::endl; + return x; + } + + int result = r->sum; + err = cudaFree(r); + if (err != cudaSuccess) { + std::cerr << "mixed_launch_kernel: cudaFree failed: " + << cudaGetErrorString(err) << std::endl; + return x; + } + + return result; } |