diff options
| author | Robert Maynard <rmaynard@nvidia.com> | 2022-04-22 16:51:26 (GMT) |
|---|---|---|
| committer | Robert Maynard <rmaynard@nvidia.com> | 2022-07-22 14:34:45 (GMT) |
| commit | 96bc59b1ca01be231347404d178445263687dd22 (patch) | |
| tree | d9c015f30a1e43f0d5ded6dc75a638471f085ed6 /Tests/Module | |
| parent | 1527d48cd0071e3e1737b51db3738f7f76ddbf80 (diff) | |
| download | CMake-96bc59b1ca01be231347404d178445263687dd22.zip CMake-96bc59b1ca01be231347404d178445263687dd22.tar.gz CMake-96bc59b1ca01be231347404d178445263687dd22.tar.bz2 | |
CUDA: Add Device LTO support for nvcc
Fixes #22200
Diffstat (limited to 'Tests/Module')
| -rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt | 32 | ||||
| -rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/bar.cu | 12 | ||||
| -rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/foo.cu | 4 | ||||
| -rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/main.cu | 62 |
4 files changed, 110 insertions, 0 deletions
diff --git a/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt new file mode 100644 index 0000000..9dd670e --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.8) +project(CheckIPOSupported-CUDA LANGUAGES CUDA) + +cmake_policy(SET CMP0069 NEW) + +include(CheckIPOSupported) +check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output) +if(ipo_supported) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) +endif() + +if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" + AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2) + message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support") +endif() + +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + +add_library(foo STATIC foo.cu) +set_target_properties(foo PROPERTIES + WINDOWS_EXPORT_ALL_SYMBOLS ON + POSITION_INDEPENDENT_CODE ON) + +add_library(bar SHARED bar.cu) +set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) +target_link_libraries(bar PRIVATE foo) + +add_executable(CheckIPOSupported-CUDA main.cu) +target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar) + +enable_testing() +add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA) diff --git a/Tests/Module/CheckIPOSupported-CUDA/bar.cu b/Tests/Module/CheckIPOSupported-CUDA/bar.cu new file mode 100644 index 0000000..79b276d --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/bar.cu @@ -0,0 +1,12 @@ +__device__ int foo_func(int); + +void __global__ bar_kernel(int x) +{ + foo_func(x); +} + +int launch_kernel(int x) +{ + bar_kernel<<<1, 1>>>(x); + return x; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/foo.cu b/Tests/Module/CheckIPOSupported-CUDA/foo.cu new file mode 100644 index 0000000..416607b --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/foo.cu @@ -0,0 +1,4 @@ +extern __device__ int foo_func(int a) +{ + return a * 42 + 9; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/main.cu b/Tests/Module/CheckIPOSupported-CUDA/main.cu new file mode 100644 index 0000000..8ef4873 --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/main.cu @@ -0,0 +1,62 @@ +#include <iostream> + +#include "cuda.h" + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +#else +# define IMPORT +#endif + +IMPORT int launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + + std::cout << "Could not find a CUDA enabled card" << std::endl; + + return 1; +} + +int main() +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + launch_kernel(1); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "launch_kernel: kernel launch should have passed.\n " + "Error message: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} |
