summaryrefslogtreecommitdiffstats
path: root/Tests
diff options
context:
space:
mode:
Diffstat (limited to 'Tests')
-rw-r--r--Tests/CMakeLists.txt5
-rw-r--r--Tests/CudaOnly/CMakeLists.txt24
-rw-r--r--Tests/CudaOnly/DeviceLTO/CMakeLists.txt37
-rw-r--r--Tests/CudaOnly/DeviceLTO/file1.cu17
-rw-r--r--Tests/CudaOnly/DeviceLTO/file2.cu5
-rw-r--r--Tests/CudaOnly/DeviceLTO/file3.cu4
-rw-r--r--Tests/CudaOnly/DeviceLTO/main.cu62
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt32
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/bar.cu12
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/foo.cu4
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/main.cu62
-rw-r--r--Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt4
12 files changed, 255 insertions, 13 deletions
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index d2ded37..f98e7e9 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -618,6 +618,11 @@ if(BUILD_TESTING)
set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX})
ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX)
+ if(CMake_TEST_CUDA)
+ ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA)
+ set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA")
+ endif()
+
if(CMAKE_Fortran_COMPILER)
set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran})
ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran)
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index aa4755d..091872d 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -7,7 +7,6 @@ endmacro ()
add_cuda_test_macro(CudaOnly.Architecture Architecture)
add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial)
add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
-
add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
@@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
endif()
+# The CUDA only ships the shared version of the toolkit libraries
+# on windows
+if(NOT WIN32)
+ add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
+endif()
+
+add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
+
+if(MSVC)
+ # Tests for features that only work with MSVC
+ add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
+endif()
+
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
@@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
PROPERTY LABELS "CUDA")
-# The CUDA only ships the shared version of the toolkit libraries
-# on windows
-if(NOT WIN32)
- add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
-endif()
-
-if(MSVC)
- add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
-endif()
-
add_test(NAME CudaOnly.RuntimeControls COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
diff --git a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt
new file mode 100644
index 0000000..653b35d
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 3.18)
+project(DeviceLTO CUDA)
+
+# Goal:
+# Verify that we correctly compile with device LTO
+# Verify that device LTO requirements are propagated to
+# the final device link line
+
+add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu)
+add_executable(CudaOnlyDeviceLTO main.cu)
+
+set_target_properties(CUDA_dlto
+ PROPERTIES
+ CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
+ CUDA_SEPARABLE_COMPILATION ON
+ POSITION_INDEPENDENT_CODE ON)
+
+set_target_properties(CudaOnlyDeviceLTO
+ PROPERTIES
+ CUDA_SEPARABLE_COMPILATION ON
+ CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
+ )
+
+target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto)
+
+include(CheckIPOSupported)
+check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported)
+if(ipo_supported)
+ set_target_properties(CUDA_dlto
+ PROPERTIES
+ INTERPROCEDURAL_OPTIMIZATION ON)
+
+ # When non-LTO variants (i.e. virtual) are built together with LTO ones the
+ # linker warns about missing device LTO for the virtual architectures.
+ # Ignore these warnings.
+ target_link_options(CudaOnlyDeviceLTO PRIVATE "$<DEVICE_LINK:-w>")
+endif()
diff --git a/Tests/CudaOnly/DeviceLTO/file1.cu b/Tests/CudaOnly/DeviceLTO/file1.cu
new file mode 100644
index 0000000..703927c
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/file1.cu
@@ -0,0 +1,17 @@
+#ifdef _WIN32
+# define EXPORT __declspec(dllexport)
+#else
+# define EXPORT
+#endif
+
+extern __device__ int file2_func(int);
+void __global__ kernel(int x)
+{
+ file2_func(x);
+}
+
+EXPORT int launch_kernel(int x)
+{
+ kernel<<<1, 1>>>(x);
+ return x;
+}
diff --git a/Tests/CudaOnly/DeviceLTO/file2.cu b/Tests/CudaOnly/DeviceLTO/file2.cu
new file mode 100644
index 0000000..73d6468
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/file2.cu
@@ -0,0 +1,5 @@
+extern __device__ int file3_func(int);
+int __device__ file2_func(int x)
+{
+ return x + file3_func(x);
+}
diff --git a/Tests/CudaOnly/DeviceLTO/file3.cu b/Tests/CudaOnly/DeviceLTO/file3.cu
new file mode 100644
index 0000000..235ac06
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/file3.cu
@@ -0,0 +1,4 @@
+int __device__ file3_func(int x)
+{
+ return x * x * x;
+}
diff --git a/Tests/CudaOnly/DeviceLTO/main.cu b/Tests/CudaOnly/DeviceLTO/main.cu
new file mode 100644
index 0000000..8ef4873
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/main.cu
@@ -0,0 +1,62 @@
+#include <iostream>
+
+#include "cuda.h"
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+#else
+# define IMPORT
+#endif
+
+IMPORT int launch_kernel(int x);
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card" << std::endl;
+
+ return 1;
+}
+
+int main()
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ launch_kernel(1);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "launch_kernel: kernel launch should have passed.\n "
+ "Error message: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
new file mode 100644
index 0000000..9dd670e
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
@@ -0,0 +1,32 @@
+cmake_minimum_required(VERSION 3.8)
+project(CheckIPOSupported-CUDA LANGUAGES CUDA)
+
+cmake_policy(SET CMP0069 NEW)
+
+include(CheckIPOSupported)
+check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output)
+if(ipo_supported)
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
+endif()
+
+if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
+ AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
+ message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support")
+endif()
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+
+add_library(foo STATIC foo.cu)
+set_target_properties(foo PROPERTIES
+ WINDOWS_EXPORT_ALL_SYMBOLS ON
+ POSITION_INDEPENDENT_CODE ON)
+
+add_library(bar SHARED bar.cu)
+set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
+target_link_libraries(bar PRIVATE foo)
+
+add_executable(CheckIPOSupported-CUDA main.cu)
+target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar)
+
+enable_testing()
+add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA)
diff --git a/Tests/Module/CheckIPOSupported-CUDA/bar.cu b/Tests/Module/CheckIPOSupported-CUDA/bar.cu
new file mode 100644
index 0000000..79b276d
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/bar.cu
@@ -0,0 +1,12 @@
+__device__ int foo_func(int);
+
+void __global__ bar_kernel(int x)
+{
+ foo_func(x);
+}
+
+int launch_kernel(int x)
+{
+ bar_kernel<<<1, 1>>>(x);
+ return x;
+}
diff --git a/Tests/Module/CheckIPOSupported-CUDA/foo.cu b/Tests/Module/CheckIPOSupported-CUDA/foo.cu
new file mode 100644
index 0000000..416607b
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/foo.cu
@@ -0,0 +1,4 @@
+extern __device__ int foo_func(int a)
+{
+ return a * 42 + 9;
+}
diff --git a/Tests/Module/CheckIPOSupported-CUDA/main.cu b/Tests/Module/CheckIPOSupported-CUDA/main.cu
new file mode 100644
index 0000000..8ef4873
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/main.cu
@@ -0,0 +1,62 @@
+#include <iostream>
+
+#include "cuda.h"
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+#else
+# define IMPORT
+#endif
+
+IMPORT int launch_kernel(int x);
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card" << std::endl;
+
+ return 1;
+}
+
+int main()
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ launch_kernel(1);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "launch_kernel: kernel launch should have passed.\n "
+ "Error message: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
index dc2c3ad..9a1ba04 100644
--- a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
+++ b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
@@ -1,6 +1,6 @@
^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\):
- IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES
- global property\)\.
+ IPO is not supported \(no C/CXX/CUDA/Fortran languages found in
+ ENABLED_LANGUAGES global property\)\.
Call Stack \(most recent call first\):
.*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\)
default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\)