diff options
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/CMakeLists.txt | 5 | ||||
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 24 | ||||
-rw-r--r-- | Tests/CudaOnly/DeviceLTO/CMakeLists.txt | 37 | ||||
-rw-r--r-- | Tests/CudaOnly/DeviceLTO/file1.cu | 17 | ||||
-rw-r--r-- | Tests/CudaOnly/DeviceLTO/file2.cu | 5 | ||||
-rw-r--r-- | Tests/CudaOnly/DeviceLTO/file3.cu | 4 | ||||
-rw-r--r-- | Tests/CudaOnly/DeviceLTO/main.cu | 62 | ||||
-rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt | 32 | ||||
-rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/bar.cu | 12 | ||||
-rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/foo.cu | 4 | ||||
-rw-r--r-- | Tests/Module/CheckIPOSupported-CUDA/main.cu | 62 | ||||
-rw-r--r-- | Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt | 4 |
12 files changed, 255 insertions, 13 deletions
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index d2ded37..f98e7e9 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -618,6 +618,11 @@ if(BUILD_TESTING) set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX}) ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX) + if(CMake_TEST_CUDA) + ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA) + set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA") + endif() + if(CMAKE_Fortran_COMPILER) set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran}) ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran) diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index aa4755d..091872d 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -7,7 +7,6 @@ endmacro () add_cuda_test_macro(CudaOnly.Architecture Architecture) add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial) add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags) - add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard) add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX) add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit) @@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) endif() +# The CUDA only ships the shared version of the toolkit libraries +# on windows +if(NOT WIN32) + add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit) +endif() + +add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO) + +if(MSVC) + # Tests for features that only work with MSVC + add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB) +endif() + add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION> --build-and-test @@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND PROPERTY LABELS "CUDA") -# The CUDA only ships the shared version of the toolkit libraries -# on windows -if(NOT WIN32) - add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit) -endif() - -if(MSVC) - add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB) -endif() - add_test(NAME CudaOnly.RuntimeControls COMMAND ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION> --build-and-test diff --git a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt new file mode 100644 index 0000000..653b35d --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.18) +project(DeviceLTO CUDA) + +# Goal: +# Verify that we correctly compile with device LTO +# Verify that device LTO requirements are propagated to +# the final device link line + +add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu) +add_executable(CudaOnlyDeviceLTO main.cu) + +set_target_properties(CUDA_dlto + PROPERTIES + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}" + CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON) + +set_target_properties(CudaOnlyDeviceLTO + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}" + ) + +target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto) + +include(CheckIPOSupported) +check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported) +if(ipo_supported) + set_target_properties(CUDA_dlto + PROPERTIES + INTERPROCEDURAL_OPTIMIZATION ON) + + # When non-LTO variants (i.e. virtual) are built together with LTO ones the + # linker warns about missing device LTO for the virtual architectures. + # Ignore these warnings. + target_link_options(CudaOnlyDeviceLTO PRIVATE "$<DEVICE_LINK:-w>") +endif() diff --git a/Tests/CudaOnly/DeviceLTO/file1.cu b/Tests/CudaOnly/DeviceLTO/file1.cu new file mode 100644 index 0000000..703927c --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/file1.cu @@ -0,0 +1,17 @@ +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +extern __device__ int file2_func(int); +void __global__ kernel(int x) +{ + file2_func(x); +} + +EXPORT int launch_kernel(int x) +{ + kernel<<<1, 1>>>(x); + return x; +} diff --git a/Tests/CudaOnly/DeviceLTO/file2.cu b/Tests/CudaOnly/DeviceLTO/file2.cu new file mode 100644 index 0000000..73d6468 --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/file2.cu @@ -0,0 +1,5 @@ +extern __device__ int file3_func(int); +int __device__ file2_func(int x) +{ + return x + file3_func(x); +} diff --git a/Tests/CudaOnly/DeviceLTO/file3.cu b/Tests/CudaOnly/DeviceLTO/file3.cu new file mode 100644 index 0000000..235ac06 --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/file3.cu @@ -0,0 +1,4 @@ +int __device__ file3_func(int x) +{ + return x * x * x; +} diff --git a/Tests/CudaOnly/DeviceLTO/main.cu b/Tests/CudaOnly/DeviceLTO/main.cu new file mode 100644 index 0000000..8ef4873 --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/main.cu @@ -0,0 +1,62 @@ +#include <iostream> + +#include "cuda.h" + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +#else +# define IMPORT +#endif + +IMPORT int launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + + std::cout << "Could not find a CUDA enabled card" << std::endl; + + return 1; +} + +int main() +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + launch_kernel(1); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "launch_kernel: kernel launch should have passed.\n " + "Error message: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt new file mode 100644 index 0000000..9dd670e --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.8) +project(CheckIPOSupported-CUDA LANGUAGES CUDA) + +cmake_policy(SET CMP0069 NEW) + +include(CheckIPOSupported) +check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output) +if(ipo_supported) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) +endif() + +if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" + AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2) + message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support") +endif() + +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + +add_library(foo STATIC foo.cu) +set_target_properties(foo PROPERTIES + WINDOWS_EXPORT_ALL_SYMBOLS ON + POSITION_INDEPENDENT_CODE ON) + +add_library(bar SHARED bar.cu) +set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) +target_link_libraries(bar PRIVATE foo) + +add_executable(CheckIPOSupported-CUDA main.cu) +target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar) + +enable_testing() +add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA) diff --git a/Tests/Module/CheckIPOSupported-CUDA/bar.cu b/Tests/Module/CheckIPOSupported-CUDA/bar.cu new file mode 100644 index 0000000..79b276d --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/bar.cu @@ -0,0 +1,12 @@ +__device__ int foo_func(int); + +void __global__ bar_kernel(int x) +{ + foo_func(x); +} + +int launch_kernel(int x) +{ + bar_kernel<<<1, 1>>>(x); + return x; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/foo.cu b/Tests/Module/CheckIPOSupported-CUDA/foo.cu new file mode 100644 index 0000000..416607b --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/foo.cu @@ -0,0 +1,4 @@ +extern __device__ int foo_func(int a) +{ + return a * 42 + 9; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/main.cu b/Tests/Module/CheckIPOSupported-CUDA/main.cu new file mode 100644 index 0000000..8ef4873 --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/main.cu @@ -0,0 +1,62 @@ +#include <iostream> + +#include "cuda.h" + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +#else +# define IMPORT +#endif + +IMPORT int launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + + std::cout << "Could not find a CUDA enabled card" << std::endl; + + return 1; +} + +int main() +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + launch_kernel(1); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "launch_kernel: kernel launch should have passed.\n " + "Error message: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt index dc2c3ad..9a1ba04 100644 --- a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt +++ b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt @@ -1,6 +1,6 @@ ^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\): - IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES - global property\)\. + IPO is not supported \(no C/CXX/CUDA/Fortran languages found in + ENABLED_LANGUAGES global property\)\. Call Stack \(most recent call first\): .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\) default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\) |